def worker(input_q, output_q, cap_params, frame_processed): print(">> Loading frozen model for worker.") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.compat.v1.Session(graph=detection_graph) while True: frame = input_q.get() if (frame is not None): '''Boxes contain coordinates for detected hands Scores contains condfidence levels If len(boxes) > 1, at least one hand is detected You can change the score_thresh value as desired''' boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # Draws bounding boxes detector_utils.draw_box_on_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # Adds frame annotated with bounding box to queue output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def grab_hands(filename): if not os.path.exists('converted'): os.makedirs('converted') cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL) img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) boxes, scores = detector_utils.detect_objects(img, detection_graph, sess) #print(boxes, scores) num_hands_detect = 2 im_width, im_height = (640, 360) points = detector_utils.draw_box_on_image(num_hands_detect, 0.27, scores, boxes, im_width, im_height, img) cv2.imshow('Single-Threaded Detection', cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) filename = filename.split("/")[-1] print(filename) print(len(points)) if len(points) == 0: return for p1, p2 in points: w = p2[0] - p1[0] h = p2[1] - p1[1] img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cropped = img[(p1[1] - ADJUSTMENT):ADJUSTMENT + p1[1] + h, (p1[0] - ADJUSTMENT):ADJUSTMENT + p1[0] + w] #cv2.imshow("cropped", cropped) fname = f'converted/{filename}' print(fname) cv2.imwrite(fname, cropped) return fname
def main(argv=None): global im process_start = time.time() #location of the instructions font_location1 = 0 font_location2 = 25 font_size = 0.6 font_thickness = 2 font_color = (255,255,255) timing = 0 timer2 = 0 #whether the ten seconds countdown finished time_up = 0 #count how many frames have been processed t = 1 #Count in how many frames we detect the right person nb_fr = 1 vs = cv2.VideoCapture(FLAGS.test_data_path) length = int(vs.get(cv2.CAP_PROP_FRAME_COUNT)) print("frames of the video:" ,length) fps = vs.get(cv2.CAP_PROP_FPS) print("FPS of the video: ",fps) video_duration = round(length/fps,2) print("duration of the video:",video_duration) #process every certain mount(gap) of frame gap = 7 #ten seconds countdown threshold = fps / gap *10 fps = FPS().start() frame_count = 0 #initialize variables over=0 #detect if there's any suspicious action, e.g., remove the pill suspicious = 0 #whether the ten senconds countdown started start_counting = 0 pill_removed = 0 #patient needs to show the pill number for a certain amount of time(shown_time_threshold) shown_time_threshold = threshold / 2 shown_time = 0 tolerance = 0 start_tracking = 0 while True: ret, frame = if ret is False: break frame_count = frame_count + 1 #process every certain amount(gap) of frame if frame_count % gap == 0: if (over==0) : t = t + 1 frame = imutils.resize(frame, width =450) im = frame[:, :, ::-1] orig = frame.copy() image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pill_inside = 0 mouth_close = 0 # find all the faces and make sure there can not be more than one person if len(face_recognition.face_locations(orig)) == 0: pass elif len(face_recognition.face_locations(orig)) > 1: print("WARNING: two person appear!") pass else: face_location = face_recognition.face_locations(orig) unknown_face_encoding = face_recognition.face_encodings(orig, face_location)[0] index = utils.recognize_face(unknown_face_encoding, known_faces_encoding) name = known_names[index] if (name == nb_fr += 1 cv2.putText(im[:, :, ::-1], name, (font_location1, font_location2), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) top, right, bottom, left = face_location[0] face_height = bottom - top # Draw a box around the face cv2.rectangle(im[:, :, ::-1], (left, top), (right, bottom), (0, 0, 255)) try: (x, y, w, h) = mouth_detection.mouth_detection_video(orig, detector, predictor) cv2.rectangle(im[:, :, ::-1], (x, y), (x + w, y + h), (0, 0, 255)) d = int(0.35 * h) #get the mouth area roi = orig[y + d:y + h, x:x + w] #detect if there's pill inside the mouth and get the pill location by white color detection in the mouth area (px, py, pw, ph) = utils.color_detection_white(roi) # pill detected if (pw!=0): # Draw a box around the pill cv2.rectangle(im[:, :, ::-1], (x + px, y + py+ d ), (x + px + pw, y + py + ph +d), (0, 255, 0), font_thickness) pill_inside = 1 start_tracking = 1 else: pill_inside = 0 #detect whether the mouth is close if h < 0.2 * face_height: mouth_close = 1 else: mouth_close = 0 if pill_inside==0 & start_tracking==1: suspicious = 1 except: pass #detect hands and get the scores of the detected hands boxes1, scores1 = detector_utils.detect_objects(image_np, detection_graph, sess1) h, w = im.shape[:2] # draw a box around the hands whose score is greater than the score_thresh hands_detected = detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores1, boxes1, w, h, im[:, :, ::-1]) if (over ==0): #step one & two (when the ten seconds count down didn't start or finish): if timer2 == 0 & time_up== 0: #step one:show the number for certain amount of frames if shown_time< shown_time_threshold : cv2.putText(im[:, :, ::-1], "Please put the pill in front of your mouth,", (font_location1, font_location2 + 25), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "with the number clearly visible to the camera.", (font_location1, font_location2 + 50), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) #when the pill is hold in front of mouth, start counting if (pill_inside==1)&(hands_detected==1): shown_time = shown_time + 1 #Step two:after the number is shown for a certain amount of time else: cv2.putText(im[:, :, ::-1], "Please put the pill on your tongue,", (font_location1, font_location2 + 75), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "then remove your hands.", (font_location1, font_location2 + 100), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) #Step three:the pill is put inside the mouth(pill is inside the mouth, no hands detected) if (pill_inside == 1) & (hands_detected==0) & (time_up == 0) : cv2.putText(im[:, :, ::-1], "Please keep the pill on your tongue for 10 seconds", (font_location1, font_location2+125), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "with your mouth closed.", (font_location1, font_location2 + 150), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) #pill is inside the mouth, ten seconds countdown can be strated when the mouth is close timer2 = 1 if timer2 == 1 : #if there's hand in the frame during the ten seconds countdown, we assume the patient took the pill out of the mouth if (hands_detected == 1)&(start_counting==1): cv2.putText(im[:, :, ::-1], "Please don't remove the pill!", (font_location1, font_location2+175), cv2.FONT_HERSHEY_SIMPLEX, font_size, (0,0,0), font_thickness) #reset the ten seconds countdown timer2 = 0 timing = 0 pill_removed = 1 else: if mouth_close==1: cv2.putText(im[:, :, ::-1], "Starting the 10 seconds countdown...", (font_location1, font_location2+200), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) timing = timing + 1 start_counting = 1 #Step four:when the ten seconds countdown is over, patient should open the mouth and show the pill is still on the tongue to make sure he/she didn't took out the pill if timing > threshold: cv2.putText(im[:, :, ::-1], "Please open your mouth and show", (font_location1, font_location2+225), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "the pill is still on your tongue.", (font_location1, font_location2 + 250), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) time_up = 1 else: time_up = 0 if time_up == 1: # pill is detected inside the mouth if (mouth_close == 0)&(pill_inside==1): cv2.putText(im[:, :, ::-1], "Thank you. You accomplished all the steps.", (font_location1, font_location2+275),cv2.FONT_HERSHEY_SIMPLEX, font_size,font_color, font_thickness) cv2.putText(im[:, :, ::-1], "In two minutes we will verify that the number was correct.", (font_location1, font_location2 + 300), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "was correct.", (font_location1, font_location2 + 325), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) global finished #the patient followed all the instruction finished = 1 #detection is finished over = 1 #if we can't detect pill in the mouth, it may because the patient is opening his/her mouth and the pill is blocked if(mouth_close==0)&(pill_inside==0): tolerance = tolerance + 1 #if hands show up before the pill is detected, we assume the patient took out the pill if (finished==0)&(hands_detected==1): #the patient didn't follow all the instructions finished = 0 #detection is finished over = 1 elif (tolerance > 3): over = 1 else: cv2.putText(im[:, :, ::-1], "Thank you. You accomplished all the steps.", (font_location1, font_location2 + 275), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "In two minutes we will verify that the number", (font_location1, font_location2 + 300), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.putText(im[:, :, ::-1], "was correct.", (font_location1, font_location2 + 325), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness) cv2.imshow("im", im[:, :, ::-1]) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break else: pass cv2.waitKey(0) face_detection_result = nb_fr/t #calculate the processing time process_time = time.time() - process_start if face_detection_result > 0.6: right_person = 1 print("It's the right person") sys.stdout.flush() else: right_person = 0 print("It's not the right person") sys.stdout.flush() if finished == 1: print("Detection finished") sys.stdout.flush() else: print("Detection is not finished") sys.stdout.flush() if pill_removed==1: print("pill has been removed") sys.stdout.flush() print("process time:",process_time) print("suspicious",suspicious) #print("Video length:",video_duration) #print(face_detection_result) fps.stop() vs.release()
import tensorflow as tf import detector_utils import cv2 detection_graph, sess = detector_utils.load_inference_graph() img = cv2.imread('hands3.jpeg') boxes, scores = detector_utils.detect_objects(img, detection_graph, sess) x,y = detector_utils.get_center(2, 0.05, scores, boxes, img.shape[1], img.shape[0], img) detector_utils.draw_box_on_image(2, 0.05, scores, boxes, img.shape[1], img.shape[0], img) img = cv2.resize(img, (int(img.shape[1]/3),int(img.shape[0]/3))) cv2.imshow('', img) cv2.waitKey(0)
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list #whether we have detcted the same number as the input number_correct = 0 time1 = time.time() with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) #use exonential moving average variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: #load weights,biases,gradients and other variables ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) #load the sece text detection model model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) saver.restore(sess, model_path) print("processing starts") duration_start = time.time() #read the frame of the input video vs = cv2.VideoCapture(FLAGS.test_data_path) fps = FPS().start() frame_count = 0 large = 0 while True: #number is not verified not_verified = 1 processing_time = time.time()-duration_start print(processing_time) frame_count = frame_count + 1 frame = frame = frame[1] if frame is None: break pill_ready = 0 # We start the number recognition when the pill is put in front of the mouth; Before that, we downsize the frame to improve the efficiency if large == 1: frame = imutils.resize(frame, height = 1500) #if there's hand holding the pill in fornt of the mouth ,we enlarge the frame and do scene text detection and reocgnition else: frame = imutils.resize(frame, height=200) im = frame[:, :, ::-1] orig = frame.copy() image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #detect hands and get the scores of the detected hands boxes1, scores1 = detector_utils.detect_objects(image_np, detection_graph, sess1) h, w = im.shape[:2] #draw a box around the hands whose score is greater than the score_thresh hands_detected = detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores1, boxes1, w, h, im[:, :, ::-1]) # if there's no hand holding the pill, we do not process the fraem if hands_detected==0: pass if len(face_recognition.face_locations(orig)) == 0: pass elif len(face_recognition.face_locations(orig)) > 1: pass else: #face detection face_location = face_recognition.face_locations(orig) top, right, bottom, left = face_location[0] cv2.rectangle(im[:, :, ::-1], (left, top), (right, bottom), (0, 0, 255)) #we do number and letter detection inside the face area roi_text = orig[top:bottom, left:right] try: #mouth detection (x, y, w, h) = mouth_detection.mouth_detection_video(orig, detector, predictor) cv2.rectangle(im[:, :, ::-1], (x, y), (x + w, y + h), (0, 0, 255)) d = int(0.35 * h) roi = orig[y + d:y + h, x:x + w] global px, py, pw, ph #pill detection inside the mouth (px, py, pw, ph) = utils.color_detection_white(roi) #pill detected if (pw != 0): cv2.rectangle(im[:, :, ::-1], (x + px, y + py + d), (x + px + pw, y + py + ph + d), (0, 255, 0), font_thickness) large = 1 except: pass if (number_correct == 0)&(large == 1): try: start_time = time.time() #resize the area of number and letter detection im_resized, (ratio_h, ratio_w) = utils.resize_image(roi_text) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry =[f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start #detect the number and letter, and get the boxes which contains the location of the number or letter boxes, timer = utils.detect(score_map=score, geo_map=geometry, timer=timer) #if the box is not none, resize the box for further recognition if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h if boxes is not None: for indBoxes, box in enumerate(boxes): #number recognition by morphology processing and tesseract(the function was written in text = utils.recognize_to_text(roi_text[:, :, ::-1], box) #if any number or letter has been detected, we set the not_berified to 0 if text is not None: not_verified = 0 #print("[recognize box({})] text: {}".format(indBoxes, text)) box = utils.sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: # strip small box continue cv2.putText(im[:, :, ::-1], text, (50, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) if text == FLAGS.expected: number_correct = 1 except: pass time1 = time.time() - time1 time2 = time.time() - duration_start #if the time of number or letter recognition is greater than certain amount of time, stop processing if time2>180: break #if we detect the same number as the input, stop processing if (number_correct == 1): break #cv2.imshow("im", im[:, :, ::-1]) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() fps.stop() vs.release() #if we detect the same number or letter as the input if number_correct==1: print("Number read is", text, ", which corresponds correctly to the pill that was dispensed.") sys.stdout.flush() #if we didn't detect any number or letter elif (not_verified == 1): print("The number is not verified") sys.stdout.flush() #if we didn't detect the right number or letter else: print("Number read is",text,", which is different from the number on the pill. We will check this manually") sys.stdout.flush() print("video processing time",time2)