Пример #1
0
def predict(model_data_path, port):
    """ starts are server"""

    tDetector = TensoflowFaceDector(model_data_path)

    # setup our server
    image_hub = imagezmq.ImageHub(open_port='tcp://*:' + port)

    print("Server Started on port {}..\n".format(port))

    while True:
        _, image = image_hub.recv_image()

        (boxes, scores, classes, num_detections) = tDetector.run(image)

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)

        image_hub.send_image('OK', image)
Пример #2
0
def getResults(requestData):
    # Takes the image as a input and processes the output with the specifies URL
    imgdata = base64.b64decode(requestData["data"])
    data = Image.open(io.BytesIO(imgdata))
    image = cv2.cvtColor(np.array(data), cv2.COLOR_BGR2RGB)
    [h, w] = image.shape[:2]

    headers = {
        'Accept': 'application/octet-stream',
        'content-type': 'application/json'
    }
    start_time1 = time.time()
    r = requests.post(url=URL, data=json.dumps(requestData), headers=headers)
    elapsed_time1 = time.time() - start_time
    print("Time to get the results : ", elapsed_time1)
    data = r.json()
    data1 = ast.literal_eval(data)

    # visulaization of the results
    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(data1["boxes"]),
        np.squeeze(data1["classes"]).astype(np.int32),
        np.squeeze(data1["scores"]),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=4)

    cv2.namedWindow("Results", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("Results", w, h)
    cv2.imshow("Results", image)
    k = cv2.waitKey(4000) & 0xff
Пример #3
0
def predict_from_video(detector, file_path):
    vs = FileVideoStream(file_path).start()
    time.sleep(2.0)
    time.sleep(2.0)
    fps = FPS().start()
    out = None
    frame_count = 0

    while vs.more():
        # grab the frame from the threaded video stream
        image = vs.read()
        frame_count += 1

        if out is None:
            [h, w] = image.shape[:2]
            out = cv2.VideoWriter("test_out.avi", 0, 25.0, (w, h))

        # Check if this is the frame closest to 5 seconds
        if frame_count == 2:
            frame_count = 0

            boxes, scores, classes, num_detections, emotions = detector.run(
                image)

            text = "classes: {}".format(emotions)
            cv2.putText(image,
                        text,
                        org=(25, 25),
                        fontFace=cv2.FONT_HERSHEY_DUPLEX,
                        fontScale=0.35,
                        color=(0, 255, 0))

            vis_util.visualize_boxes_and_labels_on_image_array(
                image,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=1)

        out.write(image)
        fps.update()
    # stop the timer and display FPS information
    fps.stop()
    print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
    print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
    # do a bit of cleanup
    cv2.destroyAllWindows()
    vs.stop()
Пример #4
0
def predict_from_camera(detector):
    print('Press q to exit')
    vs = WebcamVideoStream(src=0).start()
    fps = FPS().start()
    window_not_set = True

    while True:
        # grab the frame from the threaded video stream
        image = vs.read()
        [h, w] = image.shape[:2]
        image = cv2.flip(image, 1)

        boxes, scores, classes, num_detections, emotions_print = detector.run(
            image)

        text = "classes: {}".format(emotions_print)
        cv2.putText(image,
                    text,
                    org=(25, 25),
                    fontFace=cv2.FONT_HERSHEY_DUPLEX,
                    fontScale=0.35,
                    color=(0, 255, 0))

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=1)

        if window_not_set is True:
            cv2.namedWindow("tensorflow based (%d, %d)" % (w, h),
                            cv2.WINDOW_NORMAL)
            window_not_set = False

        cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
        k = cv2.waitKey(1) & 0xff
        if k == ord('q') or k == 27:
            break
        fps.update()
    # stop the timer and display FPS information
    fps.stop()
    print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
    print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
    # do a bit of cleanup
    cv2.destroyAllWindows()
    vs.stop()
Пример #5
0
 def get_frame(self):
     success, image = self.video.read()
     img = cv.remap(image,self.map_x,self.map_y,cv.INTER_LINEAR)
     # We are using Motion JPEG, but OpenCV defaults to capture raw images,
     # so we must encode it into JPEG in order to correctly display the
     # video stream.
     # face detection
     (boxes, scores, classes, num_detections) = tDetector.run(img)
     vis_util.visualize_boxes_and_labels_on_image_array(img,
             np.squeeze(boxes),
             np.squeeze(classes).astype(np.int32),
             np.squeeze(scores),
             category_index,
             use_normalized_coordinates=True,
             line_thickness=4)
     ret, jpeg = cv.imencode('.jpg', img)
     return jpeg.tobytes()
def run_webcam(q):
    if len(sys.argv) != 2:
        print("usage:%s (cameraID | filename) Detect faces\
	in the video example:%s 0" % (sys.argv[0], sys.argv[0]))
        exit(1)

    try:
        camID = int(sys.argv[1])

    except:
        camID = sys.argv[1]

    tDetector = TensoflowFaceDector(PATH_TO_CKPT)

    cap = cv2.VideoCapture(camID)
    windowNotSet = True
    motionornot = MotionorNot()
    pd = DataFrame({'mortionornot': []})
    data = []

    while True:
        ret, image = cap.read()
        if ret == 0:
            break

        [h, w] = image.shape[:2]
        print(h, w)
        image = cv2.flip(image, 1)

        (boxes, scores, classes, num_detections) = tDetector.run(image)

        a = vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)

        motionornot.motion(a)

        if windowNotSet is True:
            cv2.namedWindow("tensorflow based (%d, %d)" % (w, h),
                            cv2.WINDOW_NORMAL)
            windowNotSet = False

        cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
        k = cv2.waitKey(1) & 0xff
        if k == ord('q') or k == 27:
            break

    cap.release()
    pd["motionornot"] = [
        range(len(motionornot.motionornot_smooth)),
        motionornot.motionornot_smooth
    ]
    pd.to_pickle("sleep_data.pkl")
def main():
    images = glob.glob(os.path.join('bad_images/', '*.jpg'))
    tDetector = TensoflowFaceDector(PATH_TO_CKPT)
    for index in range(len(images)):
        image = cv2.imread(images[index])
        [h, w] = image.shape[:2]
        (boxes, scores, classes, num_detections) = tDetector.run(image)
        id = images[index].split('/')[1].split('.')[0] + ".jpg"

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)
        cv2.imwrite('TEST/{}'.format(id), image)
        print(avr_time / count)
Пример #8
0
def detect_face():
    tDetector = TensoflowFaceDector(PATH_TO_CKPT)

    # cap = cv2.VideoCapture(camID)
    cap = cv2.VideoCapture(0)
    windowNotSet = True
    while True:
        ret, image = cap.read()
        if ret == 0:
            break

        print(image.shape)
        [h, w] = image.shape[:2]
        print("#============================#")
        print h, w
        print("#============================#")
        image = cv2.flip(image, 1)

        (boxes, scores, classes, num_detections) = tDetector.run(image)

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)

        if windowNotSet is True:
            cv2.namedWindow("tensorflow based (%d, %d)" % (w, h),
                            cv2.WINDOW_NORMAL)
            windowNotSet = False

        cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
        k = cv2.waitKey(1) & 0xff
        if k == ord('q') or k == 27:
            break

    cap.release()
    cv2.destroyAllWindows()
Пример #9
0
def detect_photo(photo_name):
    file_name = photo_name
    print(file_name)
    tDetector = TensoflowFaceDector(PATH_TO_CKPT)

    # cap = cv2.VideoCapture(camID)
    #cap = cv2.VideoCapture(0)
    windowNotSet = True
    image = cv2.imread('%s' % file_name)

    # if ret == 0:
    #     print("error")
    [h, w] = image.shape[:2]
    print h, w
    image = cv2.flip(image, 1)

    (boxes, scores, classes, num_detections) = tDetector.run(image)
    # print(np.squeeze(boxes))
    # print(np.squeeze(scores))
    # print(num_detections)
    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=4)

    if windowNotSet is True:
        cv2.namedWindow("tensorflow based (%d, %d)" % (w, h),
                        cv2.WINDOW_NORMAL)
        windowNotSet = False

    cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
    k = cv2.waitKey(1) & 0xff
    if k == ord('q') or k == 27:
        cv2.destroyAllWindows()
Пример #10
0
def detect_objects(image,
                   thresh,
                   detection_graph,
                   sess,
                   category_index,
                   matched_area=None,
                   sequence_sorted=False,
                   sequence_type='char'):
    image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    start_time = time.time()
    if image_np_expanded[0] is not None:
        (boxes, scores, classes, num_detections) = sess.run(
            [boxes, scores, classes, num_detections],
            feed_dict={image_tensor: image_np_expanded})
        elapsed_time = time.time() - start_time
        if DEBUG_TIME:
            print('cnn', elapsed_time)
        box = vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            min_score_thresh=thresh,
            use_normalized_coordinates=True,
            line_thickness=4,
            sequence_sorted=sequence_sorted,
            sequence_type=sequence_type,
            matched_area=matched_area)
    else:
        box = {'sequence': '', 'objects': []}
    return box
def main():

	# open the camera,load the cnn model 
	camera = cv2.VideoCapture(0)

	# blinks is the number of total blinks ,close_counter
	# the counter for consecutive close predictions
	# # and mem_counter the counter of the previous loop 
	# close_counter, mem_counter, blinks, state = prop

	close_counter = blinks = mem_counter= 0
	state = ''
	tDetector = TensoflowFaceDector(PATH_TO_CKPT)
	sess = tf.Session()

	model = load_model('./model/2018_12_17_22_58_35.h5')
	
	while True:

		flag=True
		ret, frame = camera.read()

		[h, w] = frame.shape[:2]
		frame = cv2.flip(frame, 1)
		(boxes, scores, classes, num_detections) = tDetector.run(frame)

		vis_util.visualize_boxes_and_labels_on_image_array(
								frame,
								np.squeeze(boxes),
								np.squeeze(classes).astype(np.int32),
								np.squeeze(scores),
								category_index,
								use_normalized_coordinates=True,
						max_boxes_to_draw=1,
						min_score_thresh=0.4,
						line_thickness=4)

		# detect eyes
		if scores[0][0] > 0.2:

			# keep the face region from the whole frame
			face_rect = dlib.rectangle(left = int(boxes[0,0,1]*w), top = int(boxes[0,0,0]*h),
													right = int(boxes[0,0,3]*w), bottom = int(boxes[0,0,2]*h))

			FACE_RECT = np.rint([int(boxes[0,0,1]*w), int(boxes[0,0,0]*h), int(boxes[0,0,3]*w), int(boxes[0,0,2]*h)]).astype(np.int)
			FACE = np.array(cv2.cvtColor(frame[FACE_RECT[1]:FACE_RECT[3], FACE_RECT[0]:FACE_RECT[2]], cv2.COLOR_BGR2RGB), dtype=np.uint8)

			im_face.set_data(FACE)

			input_image = np.zeros(frame.shape)
			input_image[FACE_RECT[1]:FACE_RECT[3], FACE_RECT[0]:FACE_RECT[2]] = np.ones((FACE_RECT[3]-FACE_RECT[1], FACE_RECT[2]-FACE_RECT[0], 3))

			FACE = transform.resize(FACE, output_shape=img_size, preserve_range=True)/255.
			FACE_MASK = input_image
			FACE_MASK = transform.resize(input_image, output_shape=mask_size, preserve_range=True)
			im_face_mask.set_data(FACE_MASK)

			gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
			shape = predictor(gray, face_rect)
			shape = face_utils.shape_to_np(shape)

			eye_img_l, eye_rect_l = crop_eye(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), eye_points=shape[36:42])
			eye_img_r, eye_rect_r = crop_eye(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), eye_points=shape[42:48])

			LEFT_EYE = transform.resize(eye_img_l, output_shape=img_size)
			RIGHT_EYE = transform.resize(eye_img_r, output_shape=img_size)

			im_l_eye.set_data(LEFT_EYE)
			im_r_eye.set_data(RIGHT_EYE)

			plt.pause(0.00001)
			plt.show()

			LEFT_EYE = LEFT_EYE.reshape(1, *img_size, 3)
			RIGHT_EYE = RIGHT_EYE.reshape(1, *img_size, 3)
			FACE = FACE.reshape(1, *img_size, 3)
			FACE_MASK = FACE_MASK[:, :, :1]
			FACE_MASK = FACE_MASK.reshape(1, 625)

			
			val_data = [LEFT_EYE, RIGHT_EYE, FACE, FACE_MASK]

			eye_img_l = cv2.resize(cv2.cvtColor(eye_img_l, cv2.COLOR_BGR2GRAY), dsize=IMG_SIZE)
			eye_img_r = cv2.resize(cv2.cvtColor(eye_img_r, cv2.COLOR_BGR2GRAY), dsize=IMG_SIZE)
			eye_img_r = cv2.flip(eye_img_r, flipCode=1)


			eye_input_l = eye_img_l.copy().reshape((1, IMG_SIZE[1], IMG_SIZE[0], 1)).astype(np.float32) / 255.
			eye_input_r = eye_img_r.copy().reshape((1, IMG_SIZE[1], IMG_SIZE[0], 1)).astype(np.float32) / 255.

			pred_l = model.predict(eye_input_l)
			pred_r = model.predict(eye_input_r)

			# visualize
			state_l = 'O %.1f' if pred_l > 0.1 else '- %.1f'
			state_r = 'O %.1f' if pred_r > 0.1 else '- %.1f'

			state_l = state_l % pred_l
			state_r = state_r % pred_r

			pred = (pred_l+pred_r)/2

			cv2.rectangle(frame, pt1=tuple(eye_rect_l[0:2]), pt2=tuple(eye_rect_l[2:4]), color=(255,255,255), thickness=2)
			cv2.rectangle(frame, pt1=tuple(eye_rect_r[0:2]), pt2=tuple(eye_rect_r[2:4]), color=(255,255,255), thickness=2)

			cv2.putText(frame, state_l, tuple(eye_rect_l[0:2]), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
			cv2.putText(frame, state_r, tuple(eye_rect_r[0:2]), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)

			# blinks
			# if the eyes are open reset the counter for close eyes
			
			if pred > 0.3 :
				state = 'open'
				close_counter = 0
			else:
				state = 'close'
				close_counter += 1
			
			# if the eyes are open and previousle were closed
			# for sufficient number of frames then increcement 
			# the total blinks
			if state == 'open' and mem_counter > 1:
				blinks += 1
			# keep the counter for the next loop 
			mem_counter = close_counter 
			# draw the total number of blinks on the frame along with
			# the state for the frame
			cv2.putText(frame, "Blinks: {}".format(blinks), (10, 30),
					cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
			cv2.putText(frame, "State: {}".format(state), (300, 30),
					cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
			# show the frame

		cv2.imshow('Blinks Counter', frame)

		if cv2.waitKey(1) == ord('q'):
			break
	# return (frame, [], [], [], [])
	cv2.destroyAllWindows()
	del(camera)
                    dist_list = []
                    for key in face_image_dic:
                        # print(f"face_image_dic_key_embedding shape : {face_image_dic[key]['embedding'].shape}")
                        # print(f"emb_array shape : {emb_array.shape}")
                        dist_list.append(
                            np.linalg.norm(face_image_dic[key]["embedding"] -
                                           emb_array))

                    dist_list = np.array(dist_list)
                    classes[idx] = np.argmin(dist_list) + 2  # offset
                else:
                    classes[idx] = 1  # unknown

            # print(f"category_index : {category_index}")

            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                #          image_np,
                image,
                boxes,
                classes,
                scores,
                category_index,
                use_normalized_coordinates=True,
                line_thickness=4)
            out.write(image)

        cap.release()
        out.release()
Пример #13
0
def detect_face (PATH_TO_VIDEO, number,count, f_sess, f_detection_graph, f_category_index) :
    code_start = time.time()

    num = number
    c= count
    cap = cv2.VideoCapture(PATH_TO_VIDEO)
    face_list = []

    # with f_detection_graph.as_default():
    #   config = tf.ConfigProto()
    #   config.gpu_options.allow_growth = True
    #   with tf.Session(graph=f_detection_graph, config=config) as f_sess:
    frame_num = 300

    print("얼굴 찾는 중..")
    while frame_num:
        frame_num -= 1
        ret, image = cap.read()
        if ret == 0:
            break
        # video_name = video.split('.mp4')[0]
        # if out is None:
        #     [h, w] = image.shape[:2]
        #     # 아래 이름 비디오 생성
        #     out = cv2.VideoWriter(video_name+"_out.mp4", 0, 25.0, (w, h))
        if (int(cap.get(1)) % num == c):
            image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # the array based representation of the image will be used later in order to prepare the
            # face_result image with boxes and labels on it.
            # Expand dimensions since the face_model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)

            image_tensor = f_detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = f_detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the face_result image, together with the class label.
            scores = f_detection_graph.get_tensor_by_name('detection_scores:0')
            classes = f_detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = f_detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            # start_time = time.time()
            (boxes, scores, classes, num_detections) = f_sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})

            # elapsed_time = time.time() - start_time
            # print('face inference time cost: {}'.format(elapsed_time))
            # print(str(count)+', boxes.shape, boxes : '+ str(boxes.shape), str(boxes))
            # print('scores.shape,scores : '+ str(scores.shape),str(scores))
            # print('classes.shape,classes : ' + str(classes.shape),str(classes))
            # print('num_detections : ' +str(num_detections))

            # ========================================================
            # Visualization of the results of a detection.
            left, right, top, bottom = vis_util.visualize_boxes_and_labels_on_image_array(
                #          image_np,
                image,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                f_category_index,
                use_normalized_coordinates=True,
                line_thickness=4)
            # if left != 0 :
            #     crop_img = image[int(top):int(bottom), int(left):int(right)]
            #     image_list = crop_img
            # cv2.imwrite("./test_videos/"+str(len(image_list))+".jpg", crop_img)
            face_result = [left, right, top, bottom]
            # print(str(face_result))
            # print("face !!! : ", face_result)
            # if left != 0 and right != 0 and top != 0 and bottom != 0:
            #     print('얼굴 없음')
            face_list += [face_result]
        # 얼굴 찾는 비디오 생성
        # out.write(image)
    # print("Total time : "+str(time.time() - code_start))
    # cap.release()
    # out.release()
    load_face_model_time = time.time() - code_start

    return face_list, load_face_model_time
Пример #14
0
def recog(vid_dir):
    

    vid_no = 1
    frame_count = 0
    im_saved = 0
    total_image = 0
    
    curr_dir = os.getcwd()
    #enter the directory where the videos are stored
    vid_dir = "C:/Users/Mukund/Downloads/Models/tensorflow_model/videos"

    
    #create a temporary directory to hold the images
    if os.path.exists(curr_dir +"/raw_data"):
        pass
    else:
        os.makedirs(curr_dir +"/raw_data")
    image_dir = curr_dir + "/raw_data"
    
    
    #output directory where you want the unique faces to be stored
    if os.path.exists(curr_dir +"/unique_data"):
        pass
    else:
        os.makedirs(curr_dir +"/unqiue_data")
    output_dir  = curr_dir + "/unique_data"
    videos = os.listdir(vid_dir)

    #uncomment this if you don't want a highlight video of motion detection 
    check_motion(vid_dir)
    
    for file in videos:

        vid_path = vid_dir + "/" + file
        
        ctime = datetime.fromtimestamp(os.path.getctime(vid_path)).hour
        
        time_data = []     
        img_no = 0

        tDetector = TensoflowFaceDector(PATH_TO_CKPT)
        
        cap = cv2.VideoCapture(vid_path)
        print("video loaded")
        windowNotSet = True
        
        fps = cap.get(cv2.CAP_PROP_FPS)
        
        while (True ):
        
            ret, image = cap.read()
            frame_count += 1
            if ret == False:
                break
            
            vid_time = frame_count/(fps * 60 * 60)
            if True :

                copy_image = image
                [h, w] = image.shape[:2]
                (boxes, scores, classes, num_detections) = tDetector.run(copy_image)
                
                total_boxes=vis_util.visualize_boxes_and_labels_on_image_array(copy_image,np.squeeze(boxes),np.squeeze(classes).astype(np.int32),np.squeeze(scores),category_index,use_normalized_coordinates=True,line_thickness=4)
            
                for box in total_boxes:
                    ymin = int(box[0] * h)
                    xmin = int(box[1] * w)
                    ymax = int(box[2] * h)
                    xmax = int(box[3] * w)
                    cropped = image[ymin-10:ymax+10, xmin-10:xmax+10]
                    
                    cv2.imwrite(image_dir + "/" + str(img_no) + '.jpg', cropped)
                    time_data.extend([vid_time + ctime])
                    img_no += 1     
                
                    
                if windowNotSet is True:
                    cv2.namedWindow("tensorflow based (%d, %d)" % (w, h), cv2.WINDOW_NORMAL)
                    windowNotSet = False
                    
                cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
                k = cv2.waitKey(1) & 0xff
                if k == ord('q') or k == 27:
                    break
        cap.release()
        cv2.destroyAllWindows()
        total_image += img_no
        im_saved += check_encodings(image_dir,output_dir,vid_no,time_data)
        vid_no += 1
    print("total number of times a face was detected:", total_image)
    print("total unique individuals found", im_saved)
def main():

    tDetector = TensoflowFaceDector(PATH_TO_CKPT)
    sess = tf.compat.v1.Session()
    val_ops = itracker_adv.load_model(sess, PATH_TO_META_GRAPH)
    model = load_model('./model/2018_12_17_22_58_35.h5')

    final = dict()

    final['train_eye_left'] = np.array([])
    final['train_eye_right'] = np.array([])
    final['train_face'] = np.array([])
    final['train_face_mask'] = np.array([])
    final['train_y'] = np.array([])

    numpy_file = 'np.npz'

    if os.path.exists(numpy_file):
        numpy_array = np.load(numpy_file)
        final['train_eye_left'] = numpy_array['train_eye_left']
        final['train_eye_right'] = numpy_array['train_eye_right']
        final['train_face'] = numpy_array['train_face']
        final['train_face_mask'] = numpy_array['train_face_mask']
        final['train_y'] = numpy_array['train_y']

    print('Starting the process...')

    for file in img_files:

        file = ('/').join(file.split('\\'))
        print(file)
        frame = cv2.imread(file)

        [h, w] = frame.shape[:2]
        frame = cv2.flip(frame, 1)
        (boxes, scores, classes, num_detections) = tDetector.run(frame)

        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=1,
            min_score_thresh=0.4,
            line_thickness=4)

        # detect eyes
        if scores[0][0] > 0.2:

            # keep the face region from the whole frame
            face_rect = dlib.rectangle(left=int(boxes[0, 0, 1] * w),
                                       top=int(boxes[0, 0, 0] * h),
                                       right=int(boxes[0, 0, 3] * w),
                                       bottom=int(boxes[0, 0, 2] * h))

            FACE_RECT = np.rint([
                int(boxes[0, 0, 1] * w),
                int(boxes[0, 0, 0] * h),
                int(boxes[0, 0, 3] * w),
                int(boxes[0, 0, 2] * h)
            ]).astype(np.int)
            FACE = np.array(cv2.cvtColor(
                frame[FACE_RECT[1]:FACE_RECT[3], FACE_RECT[0]:FACE_RECT[2]],
                cv2.COLOR_BGR2RGB),
                            dtype=np.uint8)

            input_image = np.zeros(frame.shape)
            input_image[FACE_RECT[1]:FACE_RECT[3],
                        FACE_RECT[0]:FACE_RECT[2]] = np.ones(
                            (FACE_RECT[3] - FACE_RECT[1],
                             FACE_RECT[2] - FACE_RECT[0], 3))

            FACE = transform.resize(FACE,
                                    output_shape=img_size,
                                    preserve_range=True)
            FACE_MASK = input_image
            FACE_MASK = transform.resize(input_image,
                                         output_shape=mask_size,
                                         preserve_range=True)

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            shape = predictor(gray, face_rect)
            shape = face_utils.shape_to_np(shape)

            eye_img_l, eye_rect_l = crop_eye(cv2.cvtColor(
                frame, cv2.COLOR_BGR2RGB),
                                             eye_points=shape[36:42])
            eye_img_r, eye_rect_r = crop_eye(cv2.cvtColor(
                frame, cv2.COLOR_BGR2RGB),
                                             eye_points=shape[42:48])

            LEFT_EYE = transform.resize(eye_img_l, output_shape=img_size)
            RIGHT_EYE = transform.resize(eye_img_r, output_shape=img_size)

            LEFT_EYE = LEFT_EYE.reshape(1, *img_size, 3)
            RIGHT_EYE = RIGHT_EYE.reshape(1, *img_size, 3)
            FACE = FACE.reshape(1, *img_size, 3)
            FACE_MASK = FACE_MASK[:, :, :1]

            final['train_eye_left'] = np.append(final['train_eye_left'],
                                                LEFT_EYE)
            final['train_eye_right'] = np.append(final['train_eye_right'],
                                                 RIGHT_EYE)
            final['train_face'] = np.append(final['train_face'], FACE)
            final['train_face_mask'] = np.append(final['train_face_mask'],
                                                 FACE_MASK)
            final['train_y'] = np.append(final['train_y'], train_labels[file])
            print('Done')
    print('Done')
    np.savez(numpy_file, final)
Пример #16
0
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')

                # start_time = time.time()

                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})

                #     min_score_thresh   人脸检测阈值 越大越严格
                count1, dict1 = vis_util.visualize_boxes_and_labels_on_image_array(
                    #          image_np,
                    image,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    colors=colors,
                    min_score_thresh=0.6,
                    use_normalized_coordinates=True,
                    line_thickness=4)
                for key, value in dict1.items():
                    ix.append(dict1[key][0][3])
                    iy.append(dict1[key][0][0])
                    iw.append(dict1[key][0][1] - dict1[key][0][3])
                    ih.append(dict1[key][0][2] - dict1[key][0][0])

                for x, y, w, h in zip(ix, iy, iw, ih):

                    # tracker = cv2.Tracker_create("KCF")
                    tracker1 = kcftracker.KCFTracker(
Пример #17
0
def videoFaceDet():
    global IMAGES_PATH
    global FrameId
    IMAGES_PATH = ".//Images//{:06d}.jpg"
    FrameId = 0
    import sys
    if len(sys.argv) != 1:
        print ("usage:%s (cameraID | filename) Detect faces\
 in the video example:%s 0"%(sys.argv[0], sys.argv[0]))
        exit(1)


    try:
    	camID = 0


























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    except:
    	camID = 0

    tDetector = TensoflowFaceDector(PATH_TO_CKPT)

    cap = cv2.VideoCapture(camID)
    windowNotSet = True
    while True:
        ret, image = cap.read()
        if ret == 0:
            break

        [h, w] = image.shape[:2]
        # print (h, w)
        image = cv2.flip(image, 1)

        (boxes, scores, classes, num_detections) = tDetector.run(image)

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)

        # (ymin,xmin,ymax,xmax)
        max_boxes_to_draw = 20
        min_score_thresh = .7

        if not max_boxes_to_draw:
            max_boxes_to_draw = boxes.shape[0]
        for i in range(min(max_boxes_to_draw, boxes.shape[0])):
            if scores is None or scores.all() > min_score_thresh:
                box = tuple(boxes[i].tolist())
                # ymin, xmin, ymax, xmax = box
                ymin = int(box[0][0] * h)
                xmin = int(box[0][1] * w)
                ymax = int(box[0][2] * h)
                xmax = int(box[0][3] * w)
        # print(xmin)
        imageId = image[xmin - 80:xmin + 200, ymin - 30:ymin + 270]



        # getImage = image()
        if windowNotSet is True:
            # cv2.namedWindow("tensorflow based (%d, %d)" % (w, h), cv2.WINDOW_NORMAL)
            windowNotSet = False


        # cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
        cv2.imwrite(IMAGES_PATH.format(FrameId), imageId)
        FrameId += 1
        if FrameId == 74:
            FrameId = 0
        k = cv2.waitKey(1) & 0xff
        if k == ord('q') or k == 27:
            break

    cap.release()
def face_detection():

    # Load Tensorflow model
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')

    # Actual detection.
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Start video stream
    cap = WebcamVideoStream(0).start()
    fps = FPS().start()

    while True:

        frame = cap.read()

        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        expanded_frame = np.expand_dims(frame, axis=0)
        (boxes, scores, classes,
         num_c) = sess.run([
             detection_boxes, detection_scores, detection_classes,
             num_detections
         ],
                           feed_dict={image_tensor: expanded_frame})

        # Visualization of the detection
        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=2,
            min_score_thresh=0.40)

        cv2.imshow('Detection', frame)
        fps.update()

        if cv2.waitKey(1) == ord('q'):
            fps.stop()
            break

    print("Fps: {:.2f}".format(fps.fps()))
    fps.update()
    cap.stop()
    cv2.destroyAllWindows()
def detect_face_by_video():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    sys.path.append("..")

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_CKPT = './model/frozen_inference_graph_face.pb'

    # List of the strings that is used to add correct label for each box.
    PATH_TO_LABELS = './protos/face_label_map.pbtxt'

    NUM_CLASSES = 2

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    cap = cv2.VideoCapture("./media/test.mp4")
    out = None

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    with detection_graph.as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=detection_graph, config=config) as sess:
            frame_num = 1490
            while frame_num:
                frame_num -= 1
                ret, image = cap.read()
                if ret == 0:
                    break

                if out is None:
                    [h, w] = image.shape[:2]
                    out = cv2.VideoWriter("./media/test_out.avi", 0, 25.0,
                                          (w, h))

                image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # the array based representation of the image will be used later in order to prepare the
                # result image with boxes and labels on it.
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Actual detection.
                start_time = time.time()
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                elapsed_time = time.time() - start_time
                print('inference time cost: {}'.format(elapsed_time))
                # print(boxes.shape, boxes)
                # print(scores.shape, scores)
                # print(classes.shape, classes)
                # print(num_detections)
                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    #            image_np,
                    image,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=4)
                out.write(image)

            cap.release()
            out.release()
Пример #20
0
def run_yolo(out_filename):
    out = None

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    with detection_graph.as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=detection_graph, config=config) as sess:
            frame_num = 1490
            while frame_num:
                frame_num -= 1
                ret, image = cap.read()
                if ret == 0:
                    break

                if out is None:
                    [h, w] = image.shape[:2]
                    out = cv2.VideoWriter(out_filename, 0, 25.0, (w, h))

                # if int(cap.get(cv2.CAP_PROP_POS_FRAMES)) % 10 == 0:
                image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                # result image with boxes and labels on it.]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Actual detection.
                start_time = time.time()
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                elapsed_time = time.time() - start_time
                sys.stdout.write('Inference Time Cost: %s\r' %
                                 (format(elapsed_time)))
                sys.stdout.flush()
                # Do a gamma correction for darker images
                # pass the gamma corrected image frame
                #TODO Revisit this -- Face Rec not working on gamma correction
                # gamma = 1.5
                # adjusted = adjust_gamma(image, gamma=gamma)
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    known_face_encodings,
                    known_face_names,
                    use_normalized_coordinates=True,
                    line_thickness=4)
                out.write(image)

            cap.release()
            out.release()
    windowNotSet = True
    while True:
        ret, image = cap.read()
        if ret == 0:
            break

        [h, w] = image.shape[:2]
        print h, w
        image = cv2.flip(image, 1)

        (boxes, scores, classes, num_detections) = tDetector.run(image)

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)

        if windowNotSet is True:
            cv2.namedWindow("tensorflow based (%d, %d)" % (w, h), cv2.WINDOW_NORMAL)
            windowNotSet = False

        cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)
        k = cv2.waitKey(1) & 0xff
        if k == ord('q') or k == 27:
            break

    cap.release()
Пример #22
0
def main():
    # Globals
    scale_factor = 5
    mog2 = cv2.cuda.createBackgroundSubtractorMOG2(
        120, 5, False)  # TODO: Optimize first two arguments
    detector = TensoflowFaceDector(PATH_TO_CKPT)

    cpus = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=cpus - 1)

    cameras = [
        (
            "Main",
            'filesrc location="Main_Entrance___Entry_Camera_01_20201123084117.mp4" ! qtdemux ! h264parse ! nvh264dec ! appsink',
        ),
        (
            "Side",
            'filesrc location="Side_Entrance___Entry_Camera_01_20201123124307.mp4" ! qtdemux ! h264parse ! nvh264dec ! appsink',
        ),
        (
            "Back",
            'filesrc location="Back_Entrance___Entry_Camera_01_20201123091410.mp4" ! qtdemux ! h264parse ! nvh264dec ! appsink',
        ),
        # ('Main',
        # 'rtspsrc location="rtsp://*****:*****@192.168.1.207:554//h264Preview_01_main" ! rtph264depay ! h264parse ! nvh264dec ! appsink'),
        # ('Side',
        # 'rtspsrc location="rtsp://*****:*****@192.168.1.200:554//h264Preview_01_main" ! rtph264depay ! h264parse ! nvh264dec ! appsink'),
        # ('Back',
        # 'rtspsrc location="rtsp://*****:*****@192.168.1.203:554//h264Preview_01_main" ! rtph264depay ! h264parse ! nvh264dec ! appsink')
    ]

    capture_devices = []

    queue = deque()
    threads = []

    for camera_name, camera_stream in cameras:
        capture_device = cv2.VideoCapture(camera_stream, cv2.CAP_GSTREAMER)

        if capture_device.isOpened():
            thread = Thread(target=get_frame,
                            args=(capture_device, queue, camera_name))
            thread.daemon = True
            thread.start()
            threads.append(thread)
            cv2.namedWindow(camera_name, cv2.WINDOW_NORMAL)
            cv2.resizeWindow(camera_name, 800, 600)

    while True:
        if not len(queue):
            break  # switch to continue if using rtsp streams
        print(len(queue))
        camera_name, frame = queue.popleft()
        # loop_start = time.time()

        # GStreamer outputs planar YUV420_NV12
        gray = frame[0:1920, 0:2560]

        # CPU: Planar YUV420_NV12 makes it difficult to crop before converting, so we will convert the whole frame
        rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_NV12)

        # GPU: resize and background subtraction
        frame_gpu = cv2.cuda_GpuMat()
        frame_gpu.upload(gray)
        frame_gpu = cv2.cuda.resize(
            frame_gpu,
            (0, 0),
            fx=1.0 / scale_factor,
            fy=1.0 / scale_factor,
            interpolation=cv2.INTER_CUBIC,
        )
        frame_gpu = mog2.apply(frame_gpu, 0,
                               None)  # TODO: Optimize second argument
        frame_gpu = frame_gpu.download()

        # CPU, but no cuda version exists for this function
        contours, hierarchy = cv2.findContours(
            frame_gpu, cv2.RETR_TREE,
            cv2.CHAIN_APPROX_SIMPLE)  # TODO: Optimize last two args

        if len(contours):
            contour = max(contours, key=cv2.contourArea)
            area = cv2.contourArea(contour)
            if 500 < area:
                (x, y, w, h) = cv2.boundingRect(contour)
                x *= scale_factor
                y *= scale_factor
                w *= scale_factor
                h *= scale_factor

                roi = rgb[y:y + h, x:x + w]
                cv2.rectangle(rgb, (x, y), (x + w, y + h), (0, 255, 0), 3)

                with HiddenPrints():
                    (boxes, scores, classes,
                     num_detections) = detector.run(roi)

                vis_util.visualize_boxes_and_labels_on_image_array(
                    roi,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=4,
                )

        bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
        print(camera_name)
        cv2.imshow(camera_name, bgr)
        # print('fps', 1 / (time.time() - loop_start))

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    for capture_device in capture_devices:
        capture_device.release()

    for thread in threads:
        thread.join(timeout=1)
Пример #23
0
def main():
    # open the camera,load the cnn model
    camera = cv2.VideoCapture(0)
    model = load_model('blinkModel.hdf5')

    # blinks is the number of total blinks ,close_counter
    # the counter for consecutive close predictions
    # and mem_counter the counter of the previous loop
    close_counter = blinks = mem_counter = 0
    state = ''
    tDetector = TensoflowFaceDector(PATH_TO_CKPT)
    while True:

        ret, frame = camera.read()

        [h, w] = frame.shape[:2]
        frame = cv2.flip(frame, 1)
        (boxes, scores, classes, num_detections) = tDetector.run(frame)

        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=1,
            min_score_thresh=0.4,
            line_thickness=4)

        # detect eyes
        if scores[0][0] > 0.2:
            eyes = cropEyes(frame, boxes, [h, w])
            if eyes is None:
                continue
            else:
                left_eye, right_eye = eyes

            # average the predictions of the two eyes
            prediction = (model.predict(cnnPreprocess(left_eye)) +
                          model.predict(cnnPreprocess(right_eye))) / 2.0

            # blinks
            # if the eyes are open reset the counter for close eyes

            if prediction > 0.2:
                state = 'open'
                close_counter = 0
            else:
                state = 'close'
                close_counter += 1

            # if the eyes are open and previousle were closed
            # for sufficient number of frames then increcement
            # the total blinks
            if state == 'open' and mem_counter > 1:
                blinks += 1
            # keep the counter for the next loop
            mem_counter = close_counter

            # draw the total number of blinks on the frame along with
            # the state for the frame
            cv2.putText(frame, "Blinks: {}".format(blinks), (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
            cv2.putText(frame, "State: {}".format(state), (300, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        # show the frame
        cv2.imshow('blinks counter', frame)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord('q'):
            break

    # do a little clean up
    cv2.destroyAllWindows()
    del (camera)
Пример #24
0

if __name__ == "__main__":
    import sys

    tDetector = TensorFaceDetector()
    while True:
        ret, image = cap.read()
        if ret == 0:
            break
        [h, w] = image.shape[:2]
        print(h, w)
        # image = cv2.flip(image, 1)

        (boxes, scores, classes, num_detections) = tDetector.run(image)

        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)

        cv2.imshow("tensorflow based (%d, %d)" % (w, h), image)

        k = cv2.waitKey(1) & 0xff
        if k == ord('q') or k == 27:
            break
    #(boxes, scores, classes, num_detections) = sess.run(
    #    [boxes, scores, classes, num_detections],
    #    feed_dict={image_tensor: image_np_expanded})
    elapsed_time = time.time() - start_time
    print('batched inference time cost: {}'.format(elapsed_time))
    #print(boxes.shape, boxes)
    #print(scores.shape,scores)
    #print(classes.shape,classes)
    #print(num_detections)
    # Visualization of the results of a detection.
    for i, pred in enumerate(preds):
        vis_util.visualize_boxes_and_labels_on_image_array(
    #          image_np,
            frames[i],
            np.squeeze(pred['boxes']),
            np.squeeze(pred['classes']).astype(np.int32),
            np.squeeze(pred['scores']),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=4)
        out.write(frames[i])

# now the leftovers for max_frames % BATCH_SIZE
for frame in range(MAX_FRAMES - (MAX_FRAMES % BATCH_SIZE), MAX_FRAMES):
    ret, image = cap.read()
    if ret == 0:
        break

    if out is None:
        [h, w] = image.shape[:2]
        out = cv2.VideoWriter("./media/test_out.avi", 0, 25.0, (w, h))