def main(): if cv2gpu.is_cuda_compatible(): cv2gpu.init_gpu_detector(cascade_file_gpu) else: cv2gpu.init_cpu_detector(cascade_file_cpu) image = cv2.imread(image_file, cv2.IMREAD_COLOR) dims = image.shape image = image.ravel() # Run once it initialize gpu with instructions faces = cv2gpu.find_faces(dims, image) start = datetime.datetime.now() faces = cv2gpu.find_faces(dims, image) image = cv2.imread(image_file) for (x, y, w, h) in faces: print('X: {x}, Y: {y}, width: {w}, height: {h}'.format(x=x, y=y, w=w, h=h)) #cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2) #cv2.imshow('faces', image) #cv2.waitKey(0) print('Time processed (s): ' + str(datetime.datetime.now() - start))
def mapper(self, _, line): race_predicted = { 'Black-or-African-American': 0, 'Asian': 0, 'Asian-Middle-Eastern': 0, 'Hispanic': 0, 'Native-American': 0, 'Other': 0, 'Pacific-Islander': 0, 'White': 0 } frame_path = os.path.join(self.video_dir, line) # sys.stderr.write('frame path: {0}'.format(frame_path)) frame = cv2.imread(frame_path) frame_bgr = None if len(frame.shape) == 3: if frame.shape[2] > 1: frame_bgr = frame frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) for (x, y, w, h) in cv2gpu.find_faces(frame_path): cutout = cv2.resize(frame[y:y+h, x:x+w], (256, 256)) race_predicted_num, conf = self.recognizer.predict(cutout) race_predicted_str = self.face_labels_num[int(race_predicted_num)] race_predicted[race_predicted_str] += 1 if self.write_results: if frame_bgr: cutout = cv2.resize(frame_bgr[y:y+h, x:x+w], (256, 256)) cv2.putText(cutout, race_predicted_str, (0, 200), cv2.FONT_HERSHEY_SIMPLEX, .7, (255, 255, 255), 1) cv2.imwrite(os.path.join(self.output_dir, '{0}_{1}_{2}_{3}.jpg'.format(x, y, w, h)), cutout) for race in race_predicted: yield race, race_predicted[race]
def tryFaces(frame, type): cv2gpu.init_gpu_detector('cv2\data\haarcascades_cuda\haarcascade_frontalface_alt2.xml') framCpy = np.copy(frame) faces = cv2gpu.find_faces(image_file) for (x, y, w, h) in faces: center = (x + w // 2, y + h // 2) framCpy = cv2.ellipse(framCpy, center, (w // 2, h // 2), 0, 0, 360, (255, 0, 255), 4) # -- In each face, detect eyes cv2.imshow(str(type), framCpy) framCpy = np.zeros((2,3))
def main(): if cv2gpu.is_cuda_compatible(): cv2gpu.init_gpu_detector(cascade_file_gpu) else: cv2gpu.init_cpu_detector(cascade_file_cpu) faces = cv2gpu.find_faces(image_file) image = cv2.imread(image_file) for (x, y, w, h) in faces: cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.imshow('faces', image) cv2.waitKey(0)
def main(): if cv2gpu.is_cuda_compatible(): cv2gpu.init_gpu_detector(cascade_file_gpu) else: cv2gpu.init_cpu_detector(cascade_file_cpu) faces = cv2gpu.find_faces(image_file) image = cv2.imread(image_file) for (x, y, w, h) in faces: cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2) cv2.imshow('faces', image) cv2.waitKey(0)
def mapper(self, _, line): race_predicted = { 'Black-or-African-American': 0, 'Asian': 0, 'Asian-Middle-Eastern': 0, 'Hispanic': 0, 'Native-American': 0, 'Other': 0, 'Pacific-Islander': 0, 'White': 0 } frame_path = os.path.join(self.video_dir, line) # sys.stderr.write('frame path: {0}'.format(frame_path)) frame = cv2.imread(frame_path) frame_bgr = None if len(frame.shape) == 3: if frame.shape[2] > 1: frame_bgr = frame frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) for (x, y, w, h) in cv2gpu.find_faces(frame_path): cutout = cv2.resize(frame[y:y + h, x:x + w], (256, 256)) race_predicted_num, conf = self.recognizer.predict(cutout) race_predicted_str = self.face_labels_num[int(race_predicted_num)] race_predicted[race_predicted_str] += 1 if self.write_results: if frame_bgr: cutout = cv2.resize(frame_bgr[y:y + h, x:x + w], (256, 256)) cv2.putText(cutout, race_predicted_str, (0, 200), cv2.FONT_HERSHEY_SIMPLEX, .7, (255, 255, 255), 1) cv2.imwrite( os.path.join(self.output_dir, '{0}_{1}_{2}_{3}.jpg'.format(x, y, w, h)), cutout) for race in race_predicted: yield race, race_predicted[race]
def processFrame(self, dataURL, identity): framestart = time.time() start = time.time() head = "data:image/jpeg;base64," assert (dataURL.startswith(head)) imgdata = base64.b64decode(dataURL[len(head):]) # if args.verbose: # print("Decode the image took {} seconds.".format(time.time() - start)) # start = time.time() imgF = StringIO.StringIO() imgF.write(imgdata) imgF.seek(0) img = Image.open(imgF) #buf = np.fliplr(np.asarray(img)) buf = np.asarray(img) #rgbFrame_org = np.zeros((720, 1280, 3), dtype=np.uint8) #rgbFrame_org = np.zeros((450, 800, 3), dtype=np.uint8) #rgbFrame = np.zeros((360, 640, 3), dtype=np.uint8) #rgbFrame = np.zeros((216, 384, 3), dtype=np.uint8), frame length 44370, total 55fps, load 4ms, write 3ms, face 8.3ms #rgbFrame = np.zeros((234, 416, 3), dtype=np.uint8), frame length 51450, totoal 50fps, load 4.8ms, write 3.3ms, face 9.5ms rgbFrame = np.zeros( (252, 448, 3), dtype=np.uint8 ) # frame length 55282, totoal 48fps, load 5.4ms, write 3.6ms, face 9.6ms rgbFrame[:, :, 0] = buf[:, :, 2] rgbFrame[:, :, 1] = buf[:, :, 1] rgbFrame[:, :, 2] = buf[:, :, 0] # if args.verbose: # print("load the image took {} seconds.".format(time.time() - start)) # start = time.time() scale_factor = 1 inv_scale = 1.0 / scale_factor # rgbFrame = cv2.resize(rgbFrame_org, (0,0), fx=inv_scale, fy=inv_scale) # if args.verbose: # print("resize the image took {} seconds.".format(time.time() - start)) # start = time.time() # rgbFrame_gray = cv2.cvtColor(rgbFrame, cv2.COLOR_BGR2GRAY) # if args.verbose: # print("rgb to gray the image took {} seconds.".format(time.time() - start)) # start = time.time() #rgbFrame_gray = cv2.equalizeHist(rgbFrame_gray) #cv2.imwrite('zzz.png',rgbFrame_gray) # if not self.training: # annotatedFrame_org = np.copy(buf) # annotatedFrame = cv2.resize(annotatedFrame_org, (0,0), fx=0.5, fy=0.5) # cv2.imshow('frame', rgbFrame) # if cv2.waitKey(1) & 0xFF == ord('q'): # return #if args.verbose: # print("equalizeHist the image took {} seconds.".format(time.time() - start)) # start = time.time() identities = [] NameAndBB = [] bbs = [] # bbs = align.getAllFaceBoundingBoxes(rgbFrame) #bb = align.getLargestFaceBoundingBox(rgbFrame_gray) #Try using opencv blp face detection #minNeightbour: Parameter specifying how many neighbors each candidate rectangle should have to retain it. #faces = face_cascade.detectMultiScale(rgbFrame, 1.1, 2 ,cv2.CASCADE_SCALE_IMAGE,(20,20),(60,60)) #print(len(faces)) #convert faces to bb #for (x, y, w, h) in faces: #bbs.append(dlib.rectangle(x, y, x+w, y+h)) cv2.imwrite('zzz.jpg', rgbFrame) if args.verbose: print("load and imwrite the image took {} seconds.".format( time.time() - start)) start = time.time() #this_dir = os.path.dirname(os.path.realpath(__file__)) faces = cv2gpu.find_faces('/root/openface/zzz.jpg') #print(len(faces)) for (x, y, w, h) in faces: #print(x*scale_factor, y*scale_factor, w*scale_factor, h*scale_factor) bbs.append( dlib.rectangle(x * scale_factor, y * scale_factor, (x + w) * scale_factor, (y + h) * scale_factor)) #bbs = [bb] if bb is not None else [] if args.verbose: print("Face detection took {} seconds.".format(time.time() - start)) start = time.time() #if on training, only take the largest boundingbox, since the person going #to learn must stand in the front if self.training and len(faces) > 1: faces = max(faces, key=lambda rect: rect[2] * rect[3]) # if len(bbs) > 1: # print("Number of detected faces: ", len(bbs)) identitylist = [] replist = [] prev_bbs_isused = [0] * (len(self.prev_bbs) + 1) dist_thd = 20.0 # unit is pixel, the larger the more easy to got matching, save time, but more easy to have mismatch prob = [1.0] nn_processed_bbs = 0 for idxx, bb in enumerate(bbs): if len(bbs) > 1: print("BB:{} ->".format(idxx + 1)) # landmarks = align.findLandmarks(rgbFrame, bb) # if args.verbose: # print("Find landmarks~ took {} seconds.".format(time.time() - start)) # start = time.time() # Do tracking first, see if we can match with prev bbs, if yes, then matchingresult = matching(bb, self.prev_bbs, prev_bbs_isused, dist_thd) if not self.training and len(self.prev_bbs) > 0 and len( self.prev_identity) > 0 and len( self.prev_rep) > 0 and matchingresult[0] >= 0: print( "Tracking successful, matching index is {}, matching dist is {}, skip face landmark and nn net forward" .format(matchingresult[0], matchingresult[1])) print("prev_identity: {}".format(' '.join( str(e) for e in self.prev_identity))) #print("prev_rep: {}".format(' '.join(str(e) for e in self.prev_rep))) identity = self.prev_identity[matchingresult[0]] rep = self.prev_rep[matchingresult[0]] if identity == -1: # if len(self.people) == 1: # name = self.people[0] # else: name = "Unknown" else: prob = [1.0] if self.svm: prob = self.svm.predict_proba(rep)[0] name = self.people[identity] + ", " + str( round_to_1(prob[0] * 100)) + "%" print("[{}] is detected!".format(name)) identitylist.append(identity) replist.append(rep) NameAndBB.append( (name, bb.left() * inv_scale, bb.top() * inv_scale, (bb.right() - bb.left()) * inv_scale, (bb.bottom() - bb.top()) * inv_scale)) continue else: # One frame at most do one nn forward (hopefully the rest is handled by tracking) # Do this to ensure speed can be maintained if nn_processed_bbs >= 1: print( "quota for net.forward() is consumed, treat this bb in next frame!" ) continue if not self.training and len( self.prev_bbs) > 0 and matchingresult[0] < 0: print("Tracking fail, do normal flow") alignedFace = align.align( args.imgDim, rgbFrame, bb, #landmarks=landmarks, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) if args.verbose: print( "Find landmarks and alignment took {} seconds.".format( time.time() - start)) start = time.time() if alignedFace is None: continue # the hash is used as the key for the map phash = str(imagehash.phash(Image.fromarray(alignedFace))) # if args.verbose: # print("Image hash took {} seconds.".format(time.time() - start)) # start = time.time() #Determine identity by 1. getting representation from nn forward, 2. svm of the representation if phash in self.images: identity = self.images[phash].identity print( "phash in self.image, identity is {}".format(identity)) else: if self.training: self.trainingnumber += 1 self.trainingPhashs.append(phash) self.trainingAlignFaces.append(alignedFace) self.trainingIdentity.append(identity) # self.images[phash] = Face(rep, identity) # # TODO: Transferring as a string is suboptimal. # content = [str(x) for x in cv2.resize(alignedFace, (0,0), # fx=0.5, fy=0.5).flatten()] # #content = [str(x) for x in alignedFace.flatten()] # # if args.verbose: # # print("Flatten the alighedFace took {} seconds.".format( # # time.time() - start)) # # start = time.time() # msg = { # "type": "NEW_IMAGE", # "hash": phash, # "content": content, # "identity": identity, # "representation": rep.tolist() # } # self.sendMessage(json.dumps(msg)) # if args.verbose: # print("Send training json took {} seconds.".format( # time.time() - start)) # start = time.time() #also send the bounding box to indicate the image learnt name = "Learn: OK [" + str(self.trainingnumber) + "]" print(name) NameAndBB.append( (name, bb.left() * inv_scale, bb.top() * inv_scale, (bb.right() - bb.left()) * inv_scale, (bb.bottom() - bb.top()) * inv_scale)) else: rep = net.forward(alignedFace) nn_processed_bbs += 1 if args.verbose: print( "Neural network forward pass took {} seconds.". format(time.time() - start)) start = time.time() #Determine the identity of the rep if len(self.people) == 0: identity = -1 #unknown elif len(self.people) >= 1: if len(self.people) == 1: identity = 0 elif self.svm: #when added person >1, the identity is the index return by svm identity = self.svm.predict(rep)[0] #also need to double confirm with the probability of each class prob = self.svm.predict_proba(rep)[0] print("prob of each class: {}".format(' '.join( str(e) for e in prob))) if prob[0] < 0.8: identity = -1 print( "Top prob < 0.8, not so sure is one of the trained person, treat as unknown" ) #double confirm with class mean and std to confirm if not self.mean: self.getData() if identity >= 0: if self.mean and self.std: diff = np.absolute(self.mean[identity] - rep) dist_to_center = np.linalg.norm(diff) print( "This bb rep distance to class centre is {}" .format(dist_to_center)) #print("This class std is : {}".format(self.std[identity])) #check if diff > 6*std in any of the dimension for idx, val in enumerate( self.std[identity]): print( "idx: {}, Diff: {}, std: {}, ratio: {}" .format(idx, diff[idx], val, diff[idx] / val)) if diff[idx] > 6 * val: identity = -1 print( "Diff > 6*Std, not so sure is one of the trained person, treat as unknown" ) break else: identity = -1 else: print("hhh") identity = -1 if identity not in identities: identities.append(identity) identitylist.append(identity) replist.append(rep) if not self.training: start = time.time() #Determine the name to display if identity == -1: # if len(self.people) == 1: # name = self.people[0] # else: name = "Unknown" else: name = self.people[identity] + ", " + str( round_to_1(prob[0] * 100)) + "%" print("[{}] is detected!".format(name)) NameAndBB.append( (name, bb.left() * inv_scale, bb.top() * inv_scale, (bb.right() - bb.left()) * inv_scale, (bb.bottom() - bb.top()) * inv_scale)) # end bbs for loop # save this frame bbs and identity rep info for the next frame for tracking if not self.training and len(bbs) > 0: self.prev_bbs = bbs self.prev_identity = identitylist self.prev_rep = replist #has_prev = True # finally, send identities and annotated msg to client if not self.training: start = time.time() #dont send identities msg too often, since no this need if self.counter % 10 == 0: msg = {"type": "IDENTITIES", "identities": identities} self.sendMessage(json.dumps(msg)) # if args.verbose: # print("Send back the IDENTITIES took {} seconds.".format( # time.time() - start)) # start = time.time() if args.verbose: print("One frame took {} seconds. fps= {}".format( time.time() - framestart, 1 / (time.time() - framestart))) print( "==================================================================" ) msg = { "type": "ANNOTATED", "content": NameAndBB, "fps": round_to_1(1 / (time.time() - framestart)) } self.sendMessage(json.dumps(msg))
def processFrame(self, dataURL, identity): framestart = time.time() start = time.time() NameAndBB = [] #skip frame to achieve more smoothness #if self.counter % 2 ==0 or self.training: if True: head = "data:image/jpeg;base64," assert(dataURL.startswith(head)) imgdata = base64.b64decode(dataURL[len(head):]) if args.verbose: print("Decode the image took {} seconds.".format(time.time() - start)) start = time.time() imgF = StringIO.StringIO() imgF.write(imgdata) imgF.seek(0) pil_image = Image.open(imgF) # img.save("zzz.jpg", "JPEG") # if args.verbose: # print("img.save the image took {} seconds.".format(time.time() - start)) # start = time.time() self.rgbFrame = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) ##buf = np.fliplr(np.asarray(img)) # buf = np.asarray(img) if args.verbose: print("pil image to opencv mat took {} seconds.".format(time.time() - start)) start = time.time() #self.rgbFrame = np.zeros((720, 1280, 3), dtype=np.uint8) #self.rgbFrame = np.zeros((450, 800, 3), dtype=np.uint8) #rgbFrame = np.zeros((360, 640, 3), dtype=np.uint8) #rgbFrame = np.zeros((216, 384, 3), dtype=np.uint8), frame length 44370, total 55fps, load 4ms, write 3ms, face 8.3ms #rgbFrame = np.zeros((234, 416, 3), dtype=np.uint8), frame length 51450, totoal 50fps, load 4.8ms, write 3.3ms, face 9.5ms #rgbFrame = np.zeros((252, 448, 3), dtype=np.uint8) # frame length 55282, totoal 48fps, load 5.4ms, write 3.6ms, face 9.6ms #self.rgbFrame[:, :, 0] = buf[:, :, 2] #self.rgbFrame[:, :, 1] = buf[:, :, 1] #self.rgbFrame[:, :, 2] = buf[:, :, 0] scale_factor = 1 inv_scale = 1.0/scale_factor # rgbFrame = cv2.resize(rgbFrame_org, (0,0), fx=inv_scale, fy=inv_scale) # if args.verbose: # print("resize the image took {} seconds.".format(time.time() - start)) # start = time.time() # rgbFrame_gray = cv2.cvtColor(rgbFrame, cv2.COLOR_BGR2GRAY) # if args.verbose: # print("rgb to gray the image took {} seconds.".format(time.time() - start)) # start = time.time() #rgbFrame_gray = cv2.equalizeHist(rgbFrame_gray) #cv2.imwrite('zzz.png',rgbFrame_gray) # if not self.training: # annotatedFrame_org = np.copy(buf) # annotatedFrame = cv2.resize(annotatedFrame_org, (0,0), fx=0.5, fy=0.5) # cv2.imshow('frame', rgbFrame) # if cv2.waitKey(1) & 0xFF == ord('q'): # return #if args.verbose: # print("equalizeHist the image took {} seconds.".format(time.time() - start)) # start = time.time() identities = [] bbs = [] # bbs = align.getAllFaceBoundingBoxes(rgbFrame) #bb = align.getLargestFaceBoundingBox(rgbFrame_gray) #Try using opencv blp face detection #minNeightbour: Parameter specifying how many neighbors each candidate rectangle should have to retain it. #faces = face_cascade.detectMultiScale(rgbFrame, 1.1, 2 ,cv2.CASCADE_SCALE_IMAGE,(20,20),(60,60)) #print(len(faces)) #convert faces to bb #for (x, y, w, h) in faces: #bbs.append(dlib.rectangle(x, y, x+w, y+h)) cv2.imwrite('zzz.jpg', self.rgbFrame) #self.rgbFrame = cv2.imread('zzz.jpg') if args.verbose: print("imwrite the image took {} seconds.".format(time.time() - start)) start = time.time() # if self.training and identity == -1: # if not self.unknowntraining: # print("Now is training unknown people...") # self.unknowntraining = True #this_dir = os.path.dirname(os.path.realpath(__file__)) #if self.zzzjpg_mutex.acquire(): faces = cv2gpu.find_faces('/root/openface/zzz.jpg') #self.zzzjpg_mutex.release() #faces = cv2gpu.find_faces('http://172.18.9.99/axis-cgi/jpg/image.cgi') #print(len(faces)) #if on person specific training, only take the largest boundingbox, since the person going #to learn must stand in the front if self.training and len(faces) > 1: faces = max(faces, key=lambda rect: rect[2] * rect[3]) faces = [faces] for (x, y, w, h) in faces: #print(x*scale_factor, y*scale_factor, w*scale_factor, h*scale_factor) bbs.append(dlib.rectangle(x*scale_factor, y*scale_factor, (x+w)*scale_factor, (y+h)*scale_factor)) #bbs = [bb] if bb is not None else [] if args.verbose: print("Face detection took {} seconds.".format(time.time() - start)) start = time.time() # if len(bbs) > 1: # print("Number of detected faces: ", len(bbs)) identitylist = [] replist = [] scorelist = [] BestMatchSimilarityScore = [] prev_bbs_isused = [0] * (len(self.prev_bbs)+1) #dist_thd = 20.0 # unit is pixel, the larger the more easy to got matching, save time, but more easy to have mismatch prob = [1.0] nn_processed_bbs = 0 StopUpdatePrev = False name = None if len(bbs) == 0: print("No bbs is found in this frame!!") #Main loop for each detected bounding boxes for idxx, bb in enumerate(bbs): isNewlyDetect = False BestMatchSimilarityScore = 0 bb_width = bb.right()-bb.left() print("BB:{} ->({}, {}, {}, {})".format(idxx+1, bb.left()*scale_factor, bb.top()*scale_factor, bb_width*scale_factor, bb_width*scale_factor)) if self.training: if bb_width < 50: print("bb width < 50, active training only accept big enough heads to be learnt") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # use dlib to confirm again the bb is valid start = time.time() dlib_margin = 10 rgbFrame_roi = self.rgbFrame[max(bb.top()-dlib_margin, 0):min(720-1, bb.bottom()+dlib_margin),max(bb.left()-dlib_margin, 0):min(1280-1, bb.right()+dlib_margin)] rgbFrame_roi_resize = None #resize the ROI otherwise dlib runs very slow if bb_width > 100: resize_ratio = 0.6 if bb_width > 200: resize_ratio = 0.36 elif bb_width > 400: resize_ratio = 0.2 rgbFrame_roi_resize = cv2.resize(rgbFrame_roi, (0,0), fx=resize_ratio, fy=resize_ratio) else: rgbFrame_roi_resize = rgbFrame_roi.copy() dlib_bb = align.getLargestFaceBoundingBox(rgbFrame_roi_resize) if args.verbose: print("dlib confirmation took {} seconds.".format(time.time() - start)) start = time.time() if not dlib_bb: print("dlib confirmation fail!") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # landmarks = align.findLandmarks(rgbFrame, bb) # if args.verbose: # print("Find landmarks~ took {} seconds.".format(time.time() - start)) # start = time.time() # Do tracking first, see if we can match with prev bbs, if yes, then # for unknown, dont track more than 4 frames, as it may be wrongly classify into unknown, so give chance to correct matchingresult = self.matching(bb, self.prev_bbs, self.prev_identity, prev_bbs_isused) if (not self.training and len(self.prev_bbs)>0 and len(self.prev_identity)>0 and len(self.prev_rep)>0 and matchingresult[0] >= 0 and (self.prev_identity[matchingresult[0]] >= 0 or (self.prev_identity[matchingresult[0]] == -1 and self.counter % 5 !=0))): identity = self.prev_identity[matchingresult[0]] print("Tracking successful, matching index is {}, matching identity is {}, matching dist is {}, skip face landmark and nn net forward".format(matchingresult[0], identity, matchingresult[1])) #print("prev_identity: {}".format(' '.join(str(e) for e in self.prev_identity))) #print("prev_rep: {}".format(' '.join(str(e) for e in self.prev_rep))) BestMatchSimilarityScore = self.prev_score[matchingresult[0]] rep = self.prev_rep[matchingresult[0]] if identity == -1: # double confirm whether it is real unmatch with any person in the database # becoz it is possible that it is a miskake to recongize a real person into unknown alignedFace = align.align(args.imgDim, self.rgbFrame, bb, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) phash = str(imagehash.phash(Image.fromarray(alignedFace))) if phash in self.images: print("phash in self.image, skip training this") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue rep = net.forward(alignedFace) print("Double check if this rep cannot match with any record in the database...") (ide, sscore) = self.Determine_identity_by_rep(rep) if ide == -1: # add new person automatically since it detects a new bb self.unknowntraininglatestindex += 1 identity = 0 if self.identity_ofppl: identity = max(self.identity_ofppl)+1 newpersonname = "Unknown" + str(self.unknowntraininglatestindex) self.people.append(newpersonname) self.identity_ofppl.append(identity) print("A new unknown person is detected -> {}, identity = {}".format(newpersonname, identity)) msg = { "type": "NEW_PERSON", "val": newpersonname, "identity": identity } self.sendMessage(json.dumps(msg)) self.unknowntraining_list.append(identity) if identity not in identities: identities.append(identity) else: print("Fail! it is not a stable unknown, will not trigger automatic learning") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # if it is the already learnt person if not identity in self.unknowntraining_list: name = self.people[self.identity_ofppl.index(identity)] #+ ", " + str(round_to_1(prob[0]*100)) + "%" print ("[{}] is detected! its identity is {}".format(name, identity)) #isNewlyDetect is for the bb that is tracked for the first time if not identity in self.tracked_list_of_ppl: isNewlyDetect = True print("A newly detected face! update the result table") self.tracked_list_of_ppl.append(identity) # if it is the new unknown which is given a new identity but not yet finish training else: #if collect enough photo for the unknown, start to train face Num_of_img_unknown_need_to_train = 40 if identity in self.trainingnumber_foreachide and self.trainingnumber_foreachide[identity] >= Num_of_img_unknown_need_to_train: self.trainFace() self.unknowntraining_list.remove(identity) name = "Finished!" print("{} Learning finished!".format(self.people[self.identity_ofppl.index(identity)])) else: #if self.rgbFrame_mutex.acquire(): alignedFace = align.align(args.imgDim, self.rgbFrame, bb, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) #self.rgbFrame_mutex.release() phash = str(imagehash.phash(Image.fromarray(alignedFace))) if phash in self.images: print("phash in self.image, skip training this") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue self.trainingPhashs.append(phash) self.trainingAlignFaces.append(alignedFace) self.trainingIdentity.append(identity) self.trainingRep.append(None) self.trainingContent.append(None) self.trainingnumber += 1 if identity in self.trainingnumber_foreachide: self.trainingnumber_foreachide[identity] += 1 else: self.trainingnumber_foreachide[identity] = 1 percentage = 100.0*(self.trainingnumber_foreachide[identity]/(1.0*Num_of_img_unknown_need_to_train)) name = "Learning [" + str(round_to_1(percentage)) + "%]" print("{} -> {}, identity {},in unknown person training mode".format(name, self.people[self.identity_ofppl.index(identity)], identity)) identitylist.append(identity) replist.append(rep) scorelist.append(BestMatchSimilarityScore) NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, bb_width*inv_scale, bb_width*inv_scale, identity, isNewlyDetect, BestMatchSimilarityScore)) #continue #when tracking fails or if it is in active learning mode else: # One frame at most do one nn forward (hopefully the rest is handled by tracking) # Do this to ensure speed can be maintained if nn_processed_bbs >= 1: print("quota for net.forward() is consumed, treat this bb in next frame!") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue if not self.training and len(self.prev_bbs)>0 and matchingresult[0] < 0: print("Tracking fail, tracking dist is {}, do normal flow".format(matchingresult[1])) #if self.rgbFrame_mutex.acquire(): alignedFace = align.align(args.imgDim, self.rgbFrame, bb, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) #self.rgbFrame_mutex.release() if args.verbose: print("Find landmarks and alignment took {} seconds.".format(time.time() - start)) start = time.time() if alignedFace is None: identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # the hash is used as the key for the map phash = str(imagehash.phash(Image.fromarray(alignedFace))) # if args.verbose: # print("Image hash took {} seconds.".format(time.time() - start)) # start = time.time() #Determine identity by 1. getting representation from nn forward, 2. svm of the representation if phash in self.images and self.training: #identity = self.images[phash].identity print("phash in self.image, skip training this") identitylist.append(-99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue #active training mode if self.training: self.trainingPhashs.append(phash) self.trainingAlignFaces.append(alignedFace) self.trainingIdentity.append(identity) self.trainingRep.append(None) self.trainingContent.append(None) self.trainingnumber += 1 if identity in self.trainingnumber_foreachide: self.trainingnumber_foreachide[identity] += 1 else: self.trainingnumber_foreachide[identity] = 1 name = "Learning [" + str(self.trainingnumber_foreachide[identity]) + "Img]" print("{} -> {}, identity {},in active person training mode".format(name, self.people[self.identity_ofppl.index(identity)], identity)) #print(name) NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, bb_width*inv_scale, bb_width*inv_scale, identity, isNewlyDetect, BestMatchSimilarityScore)) #if not active training and fail for tracking else: rep = net.forward(alignedFace) #isNewlyDetect = True #print("A newly detected face!") nn_processed_bbs += 1 if args.verbose: print("Neural network forward pass took {} seconds.".format( time.time() - start)) start = time.time() #Tracking fails, so need to determine the identity of the bb (identity, BestMatchSimilarityScore) = self.Determine_identity_by_rep(rep) if identity not in identities: identities.append(identity) if identity in self.tracked_list_of_ppl: self.tracked_list_of_ppl.remove(identity) identitylist.append(identity) replist.append(rep) scorelist.append(BestMatchSimilarityScore) #Determine the name to display if identity == -1: name = "Unknown" else: name = self.people[self.identity_ofppl.index(identity)] #+ ", " + str(round_to_1(prob[0]*100)) + "%" print ("[{}] is detected! its identity is {}".format(name, identity)) # NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, (bb.right()-bb.left())*inv_scale, # (bb.bottom()-bb.top())*inv_scale, identity, isNewlyDetect, BestMatchSimilarityScore)) # end bbs for loop #if self.stop_all_tracking: #self.stop_all_tracking = False # save this frame bbs and identity rep info for the next frame for tracking if not self.training and len(bbs) > 0 and not StopUpdatePrev: #Make tracking more easy for unknowntraining mode, even one frame miss the target bb for idx, val in enumerate(self.prev_identity): if not val in identitylist and val > -1 and val in self.unknowntraining_list: if not val in self.rescuetimes or val in self.rescuetimes and self.rescuetimes[val] < 30: bbs.append(self.prev_bbs[idx]) identitylist.append(self.prev_identity[idx]) replist.append(self.prev_rep[idx]) scorelist.append(self.prev_score[idx]) if not val in self.rescuetimes: self.rescuetimes[val]=1 else: self.rescuetimes[val]+=1 print("in unknowntraining mode, rescue identity {} for the {} time".format(val, self.rescuetimes[val])) else: print("in unknowntraining mode, cannot rescue identity {} anymore".format(val)) elif val in identitylist and val > -1: self.rescuetimes[val]=0 self.prev_bbs = bbs self.prev_identity = identitylist self.prev_rep = replist self.prev_score = scorelist #has_prev = True # finally, send identities and annotated msg to client if not self.training: start = time.time() #dont send identities msg too often, since no this need if self.counter %10 == 0: msg = { "type": "IDENTITIES", "identities": identities } self.sendMessage(json.dumps(msg)) # if args.verbose: # print("Send back the IDENTITIES took {} seconds.".format( # time.time() - start)) # start = time.time() self.lastframetime = time.time() - framestart if args.verbose: print("One frame took {} seconds. fps= {}".format(self.lastframetime, 1/self.lastframetime)) #else: #print("Skip frame") print("==================================================================") msg = { "type": "ANNOTATED", "content": NameAndBB, "fps": round_to_1(1/self.lastframetime) } self.sendMessage(json.dumps(msg))
def detect(self, image_path): return cv2gpu.find_faces(image_path)
descriptor = None while True: # capture frame frame = video.read() # detect faces gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if args.detector == 'opencv': rects = detector.detectMultiScale( gray, scaleFactor=args.opencv_scalefactor, minNeighbors=args.opencv_minneighbors, minSize=(args.opencv_minsize, args.opencv_minsize), flags=cv2.cv.CV_HAAR_SCALE_IMAGE) elif args.detector == 'cv2gpu': rects = cv2gpu.find_faces(gray) else: rects = detector(gray, args.upscale) # skip following if no faces were found if len(rects) == 0: canvas[frame_top:frame_top + frame_height, frame_left:frame_left + frame_width, :] = frame descriptor = None canvas[face_top:frame_top].fill(0) else: # draw rectangle around the face frame2 = frame.copy() frame = frame[..., ::-1] # BGR to RGB rect = rects[0] if args.detector == 'cnn':
def processFrame(self, dataURL, identity): framestart = time.time() start = time.time() head = "data:image/jpeg;base64," assert(dataURL.startswith(head)) imgdata = base64.b64decode(dataURL[len(head):]) if args.verbose: print("Decode the image took {} seconds.".format(time.time() - start)) start = time.time() imgF = StringIO.StringIO() imgF.write(imgdata) imgF.seek(0) img = Image.open(imgF) #buf = np.fliplr(np.asarray(img)) buf = np.asarray(img) #rgbFrame_org = np.zeros((720, 1280, 3), dtype=np.uint8) #rgbFrame_org = np.zeros((450, 800, 3), dtype=np.uint8) #rgbFrame = np.zeros((360, 640, 3), dtype=np.uint8) rgbFrame = np.zeros((216, 384, 3), dtype=np.uint8) rgbFrame[:, :, 0] = buf[:, :, 2] rgbFrame[:, :, 1] = buf[:, :, 1] rgbFrame[:, :, 2] = buf[:, :, 0] if args.verbose: print("load the image took {} seconds.".format(time.time() - start)) start = time.time() scale_factor = 1 inv_scale = 1.0/scale_factor # rgbFrame = cv2.resize(rgbFrame_org, (0,0), fx=inv_scale, fy=inv_scale) # if args.verbose: # print("resize the image took {} seconds.".format(time.time() - start)) # start = time.time() # rgbFrame_gray = cv2.cvtColor(rgbFrame, cv2.COLOR_BGR2GRAY) # if args.verbose: # print("rgb to gray the image took {} seconds.".format(time.time() - start)) # start = time.time() #rgbFrame_gray = cv2.equalizeHist(rgbFrame_gray) #cv2.imwrite('zzz.png',rgbFrame_gray) # if not self.training: # annotatedFrame_org = np.copy(buf) # annotatedFrame = cv2.resize(annotatedFrame_org, (0,0), fx=0.5, fy=0.5) # cv2.imshow('frame', rgbFrame) # if cv2.waitKey(1) & 0xFF == ord('q'): # return #if args.verbose: # print("equalizeHist the image took {} seconds.".format(time.time() - start)) # start = time.time() identities = [] NameAndBB = [] bbs = [] # bbs = align.getAllFaceBoundingBoxes(rgbFrame) #bb = align.getLargestFaceBoundingBox(rgbFrame_gray) #Try using opencv blp face detection #minNeightbour: Parameter specifying how many neighbors each candidate rectangle should have to retain it. #faces = face_cascade.detectMultiScale(rgbFrame, 1.1, 2 ,cv2.CASCADE_SCALE_IMAGE,(20,20),(60,60)) #print(len(faces)) #convert faces to bb #for (x, y, w, h) in faces: #bbs.append(dlib.rectangle(x, y, x+w, y+h)) cv2.imwrite('zzz.jpg',rgbFrame) if args.verbose: print("imwrite the image took {} seconds.".format(time.time() - start)) start = time.time() #this_dir = os.path.dirname(os.path.realpath(__file__)) faces = cv2gpu.find_faces('/root/openface/zzz.jpg') #print(len(faces)) for (x, y, w, h) in faces: print(x*scale_factor, y*scale_factor, w*scale_factor, h*scale_factor) bbs.append(dlib.rectangle(x*scale_factor, y*scale_factor, (x+w)*scale_factor, (y+h)*scale_factor)) #bbs = [bb] if bb is not None else [] if args.verbose: print("Face detection took {} seconds.".format(time.time() - start)) start = time.time() #if on training, only take the largest boundingbox, since the person going #to learn must stand in the front if self.training and len(faces) > 1: faces = max(faces, key=lambda rect: rect[2] * rect[3]) print("Number of detected faces: ", len(bbs)) identitylist = [] prev_bbs_isused = [0] * (len(self.prev_bbs)+1) dist_thd = 20.0 # unit is pixel, the larger the more easy to got matching, save time, but more easy to have mismatch for bb in bbs: # landmarks = align.findLandmarks(rgbFrame, bb) # if args.verbose: # print("Find landmarks~ took {} seconds.".format(time.time() - start)) # start = time.time() # Do tracking first, see if we can match with prev bbs, if yes, then matchingresult = matching(bb, self.prev_bbs, prev_bbs_isused, dist_thd) if not self.training and len(self.prev_bbs)>0 and len(self.prev_identity)>0 and matchingresult[0] >= 0: print("Tracking successful, matching index is {}, matching dist is {}, skip face landmark and nn net forward".format(matchingresult[0], matchingresult[1])) print("prev_identity: {}".format(''.join(str(e) for e in self.prev_identity))) identity = self.prev_identity[matchingresult[0]] if identity == -1: if len(self.people) == 1: name = self.people[0] else: name = "Unknown" else: name = self.people[identity] identitylist.append(identity) NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, (bb.right()-bb.left())*inv_scale, (bb.bottom()-bb.top())*inv_scale)) continue else: if len(self.prev_bbs)>0 and matchingresult[0] < 0: print("Tracking fail, do normal flow") alignedFace = align.align(args.imgDim, rgbFrame, bb, #landmarks=landmarks, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) if args.verbose: print("Find landmarks and alignment took {} seconds.".format(time.time() - start)) start = time.time() if alignedFace is None: continue phash = str(imagehash.phash(Image.fromarray(alignedFace))) if args.verbose: print("Image hash took {} seconds.".format(time.time() - start)) start = time.time() #Determine identity by 1. getting representation from nn forward, 2. svm of the representation if phash in self.images: identity = self.images[phash].identity print("phash in self.image, identity is {}".format(identity)) else: rep = net.forward(alignedFace) if args.verbose: print("Neural network forward pass took {} seconds.".format( time.time() - start)) start = time.time() # print(rep) if self.training: self.images[phash] = Face(rep, identity) # TODO: Transferring as a string is suboptimal. content = [str(x) for x in cv2.resize(alignedFace, (0,0), fx=0.5, fy=0.5).flatten()] #content = [str(x) for x in alignedFace.flatten()] if args.verbose: print("Flatten the alighedFace took {} seconds.".format( time.time() - start)) start = time.time() msg = { "type": "NEW_IMAGE", "hash": phash, "content": content, "identity": identity, "representation": rep.tolist() } self.sendMessage(json.dumps(msg)) if args.verbose: print("Send training json took {} seconds.".format( time.time() - start)) start = time.time() #also send the bounding box to indicate the image learnt name = "Learn: OK" NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, (bb.right()-bb.left())*inv_scale, (bb.bottom()-bb.top())*inv_scale)) else: if len(self.people) == 0: identity = -1 elif len(self.people) == 1: identity = 0 elif self.svm: #when added person >1, the identity is the index return by svm identity = self.svm.predict(rep)[0] else: print("hhh") identity = -1 if identity not in identities: identities.append(identity) identitylist.append(identity) if not self.training: start = time.time() # bl = (bb.left(), bb.bottom()) # tr = (bb.right(), bb.top()) # cv2.rectangle(annotatedFrame, bl, tr, color=(153, 255, 204), # thickness=3) # for p in openface.AlignDlib.OUTER_EYES_AND_NOSE: # cv2.circle(annotatedFrame, center=landmarks[p], radius=3, # color=(102, 204, 255), thickness=-1) #Determine the name to display if identity == -1: if len(self.people) == 1: name = self.people[0] else: name = "Unknown" else: name = self.people[identity] # cv2.putText(annotatedFrame, name, (bb.left(), bb.top() - 10), # cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.75, # color=(152, 255, 204), thickness=2) # if args.verbose: # print("Drawing took {} seconds.".format( # time.time() - start)) # start = time.time() #(name, x, y, width, height) NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, (bb.right()-bb.left())*inv_scale, (bb.bottom()-bb.top())*inv_scale)) # end bbs for loop if len(bbs) > 0: self.prev_bbs = bbs self.prev_identity = identitylist #has_prev = True # finally, send identities and annotated msg to client if not self.training: start = time.time() msg = { "type": "IDENTITIES", "identities": identities } self.sendMessage(json.dumps(msg)) if args.verbose: print("Send back the IDENTITIES took {} seconds.".format( time.time() - start)) start = time.time() # plt.figure() # plt.imshow(annotatedFrame) # if args.verbose: # print("plt.imshow() took {} seconds.".format( # time.time() - start)) # start = time.time() # plt.xticks([]) # plt.yticks([]) # imgdata = StringIO.StringIO() # plt.savefig(imgdata, format='png') # if args.verbose: # print("plt.savefig() took {} seconds.".format( # time.time() - start)) # start = time.time() # # cv2.imwrite(imgdata, annotatedFrame) # # if args.verbose: # # print("cv2.imwrite() took {} seconds.".format( # # time.time() - start)) # # start = time.time() # imgdata.seek(0) # content = 'data:image/png;base64,' + \ # urllib.quote(base64.b64encode(imgdata.buf)) # plt.close() #if args.verbose: # print("Send back the ANNOTATED info took {} seconds.".format( # time.time() - start)) # start = time.time() if args.verbose: print("One frame took {} seconds. fps= {}".format( time.time() - framestart, 1/(time.time() - framestart))) print("==================================================================") msg = { "type": "ANNOTATED", "content": NameAndBB, "fps": round_to_1(1/(time.time() - framestart)) } self.sendMessage(json.dumps(msg))
def prune_func_gpu(image_path): if getsize(image_path) < size_file_max: faces = cv2gpu.find_faces(image_path) if faces: return len(faces) > 0 return False
def processFrame(self, dataURL, identity): framestart = time.time() start = time.time() NameAndBB = [] #skip frame to achieve more smoothness #if self.counter % 2 ==0 or self.training: if True: head = "data:image/jpeg;base64," assert (dataURL.startswith(head)) imgdata = base64.b64decode(dataURL[len(head):]) if args.verbose: print("Decode the image took {} seconds.".format(time.time() - start)) start = time.time() imgF = StringIO.StringIO() imgF.write(imgdata) imgF.seek(0) img = Image.open(imgF) ##buf = np.fliplr(np.asarray(img)) buf = np.asarray(img) #rgbFrame_org = np.zeros((720, 1280, 3), dtype=np.uint8) self.rgbFrame = np.zeros((450, 800, 3), dtype=np.uint8) #rgbFrame = np.zeros((360, 640, 3), dtype=np.uint8) #rgbFrame = np.zeros((216, 384, 3), dtype=np.uint8), frame length 44370, total 55fps, load 4ms, write 3ms, face 8.3ms #rgbFrame = np.zeros((234, 416, 3), dtype=np.uint8), frame length 51450, totoal 50fps, load 4.8ms, write 3.3ms, face 9.5ms #rgbFrame = np.zeros((252, 448, 3), dtype=np.uint8) # frame length 55282, totoal 48fps, load 5.4ms, write 3.6ms, face 9.6ms self.rgbFrame[:, :, 0] = buf[:, :, 2] self.rgbFrame[:, :, 1] = buf[:, :, 1] self.rgbFrame[:, :, 2] = buf[:, :, 0] if args.verbose: print("load the image took {} seconds.".format(time.time() - start)) start = time.time() scale_factor = 1 inv_scale = 1.0 / scale_factor # rgbFrame = cv2.resize(rgbFrame_org, (0,0), fx=inv_scale, fy=inv_scale) # if args.verbose: # print("resize the image took {} seconds.".format(time.time() - start)) # start = time.time() # rgbFrame_gray = cv2.cvtColor(rgbFrame, cv2.COLOR_BGR2GRAY) # if args.verbose: # print("rgb to gray the image took {} seconds.".format(time.time() - start)) # start = time.time() #rgbFrame_gray = cv2.equalizeHist(rgbFrame_gray) #cv2.imwrite('zzz.png',rgbFrame_gray) # if not self.training: # annotatedFrame_org = np.copy(buf) # annotatedFrame = cv2.resize(annotatedFrame_org, (0,0), fx=0.5, fy=0.5) # cv2.imshow('frame', rgbFrame) # if cv2.waitKey(1) & 0xFF == ord('q'): # return #if args.verbose: # print("equalizeHist the image took {} seconds.".format(time.time() - start)) # start = time.time() identities = [] bbs = [] # bbs = align.getAllFaceBoundingBoxes(rgbFrame) #bb = align.getLargestFaceBoundingBox(rgbFrame_gray) #Try using opencv blp face detection #minNeightbour: Parameter specifying how many neighbors each candidate rectangle should have to retain it. #faces = face_cascade.detectMultiScale(rgbFrame, 1.1, 2 ,cv2.CASCADE_SCALE_IMAGE,(20,20),(60,60)) #print(len(faces)) #convert faces to bb #for (x, y, w, h) in faces: #bbs.append(dlib.rectangle(x, y, x+w, y+h)) cv2.imwrite('zzz.jpg', self.rgbFrame) if args.verbose: print("imwrite the image took {} seconds.".format(time.time() - start)) start = time.time() if self.training and identity == -1: if not self.unknowntraining: print("Now is training unknown people...") self.unknowntraining = True #this_dir = os.path.dirname(os.path.realpath(__file__)) #if self.zzzjpg_mutex.acquire(): faces = cv2gpu.find_faces('/root/openface/zzz.jpg') #self.zzzjpg_mutex.release() #faces = cv2gpu.find_faces('http://172.18.9.99/axis-cgi/jpg/image.cgi') #print(len(faces)) #if on person specific training, only take the largest boundingbox, since the person going #to learn must stand in the front if self.training and len(faces) > 1 and not self.unknowntraining: faces = max(faces, key=lambda rect: rect[2] * rect[3]) faces = [faces] for (x, y, w, h) in faces: #print(x*scale_factor, y*scale_factor, w*scale_factor, h*scale_factor) bbs.append( dlib.rectangle(x * scale_factor, y * scale_factor, (x + w) * scale_factor, (y + h) * scale_factor)) #bbs = [bb] if bb is not None else [] if args.verbose: print("Face detection took {} seconds.".format(time.time() - start)) start = time.time() # if len(bbs) > 1: # print("Number of detected faces: ", len(bbs)) identitylist = [] replist = [] scorelist = [] BestMatchSimilarityScore = [] prev_bbs_isused = [0] * (len(self.prev_bbs) + 1) #dist_thd = 20.0 # unit is pixel, the larger the more easy to got matching, save time, but more easy to have mismatch prob = [1.0] nn_processed_bbs = 0 StopUpdatePrev = False if len(bbs) == 0: print("No bbs is found in this frame!!") for idxx, bb in enumerate(bbs): isNewlyDetect = False BestMatchSimilarityScore = 0 print("BB:{} ->({}, {}, {}, {})".format( idxx + 1, bb.left() * scale_factor, bb.top() * scale_factor, (bb.right() - bb.left()) * scale_factor, (bb.bottom() - bb.top()) * scale_factor)) if self.training: if (bb.right() - bb.left()) < 50: print( "bb width < 50, training only accept big enough heads to be learnt" ) identitylist.append( -99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # use dlib to confirm again rgbFrame_roi = self.rgbFrame[ max(bb.top() - 10, 0):min(450 - 1, bb.bottom() + 10), max(bb.left() - 10, 0):min(800 - 1, bb.right() + 10)] if not align.getLargestFaceBoundingBox(rgbFrame_roi): print("dlib confirmation fail!") identitylist.append( -99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # landmarks = align.findLandmarks(rgbFrame, bb) # if args.verbose: # print("Find landmarks~ took {} seconds.".format(time.time() - start)) # start = time.time() # Do tracking first, see if we can match with prev bbs, if yes, then # for unknown, dont track more than 4 frames, as it may be wrongly classify into unknown, so give chance to correct matchingresult = matching(bb, self.prev_bbs, self.prev_identity, prev_bbs_isused, self.unknowntraining) if ((not self.training or self.unknowntraining) and len(self.prev_bbs) > 0 and len(self.prev_identity) > 0 and len(self.prev_rep) > 0 and matchingresult[0] >= 0 and (self.prev_identity[matchingresult[0]] >= 0 or (self.prev_identity[matchingresult[0]] == -1 and self.counter % 5 != 0))): identity = self.prev_identity[matchingresult[0]] print( "Tracking successful, matching index is {}, matching identity is {}, matching dist is {}, skip face landmark and nn net forward" .format(matchingresult[0], identity, matchingresult[1])) #print("prev_identity: {}".format(' '.join(str(e) for e in self.prev_identity))) #print("prev_rep: {}".format(' '.join(str(e) for e in self.prev_rep))) BestMatchSimilarityScore = self.prev_score[ matchingresult[0]] rep = self.prev_rep[matchingresult[0]] if not self.unknowntraining: if identity == -1: # if len(self.people) == 1: # name = self.people[0] # else: name = "Unknown" else: # prob = [1.0] # if self.svm: # prob = self.svm.predict_proba(rep)[identity] name = self.people[self.identity_ofppl.index( identity )] #+ ", " + str(round_to_1(prob[0]*100)) + "%" print("[{}] is detected! its identity is {}".format( name, identity)) #isNewlyDetect is for the bb that is tracked for the first time if not identity in self.tracked_list_of_ppl: isNewlyDetect = True print( "A newly detected face! update the result table" ) self.tracked_list_of_ppl.append(identity) else: if identity == -1: # add new person automatically since it detects a new bb self.unknowntraininglatestindex += 1 identity = 0 if self.identity_ofppl: identity = max(self.identity_ofppl) + 1 newpersonname = "Unknown" + str( self.unknowntraininglatestindex) self.people.append(newpersonname) self.identity_ofppl.append(identity) print( "A new person is detected in unknown training mode -> {}, identity = {}" .format(newpersonname, identity)) msg = { "type": "NEW_PERSON", "val": newpersonname, "identity": identity } self.sendMessage(json.dumps(msg)) self.unknowntraining_list.append(identity) if identity not in identities: identities.append(identity) #if self.rgbFrame_mutex.acquire(): alignedFace = align.align( args.imgDim, self.rgbFrame, bb, landmarkIndices=openface.AlignDlib. OUTER_EYES_AND_NOSE) #self.rgbFrame_mutex.release() phash = str( imagehash.phash(Image.fromarray(alignedFace))) self.trainingnumber += 1 self.trainingPhashs.append(phash) self.trainingAlignFaces.append(alignedFace) self.trainingIdentity.append(identity) if identity in self.trainingnumber_foreachide: self.trainingnumber_foreachide[identity] += 1 else: self.trainingnumber_foreachide[identity] = 1 name = "Learn: OK [" + str( self.trainingnumber_foreachide[identity]) + "]" print( "{} -> {}, identity {},in unknown person training mode" .format( name, self.people[self.identity_ofppl.index( identity)], identity)) identitylist.append(identity) replist.append(rep) scorelist.append(BestMatchSimilarityScore) NameAndBB.append( (name, bb.left() * inv_scale, bb.top() * inv_scale, (bb.right() - bb.left()) * inv_scale, (bb.bottom() - bb.top()) * inv_scale, identity, isNewlyDetect, BestMatchSimilarityScore)) #continue else: # One frame at most do one nn forward (hopefully the rest is handled by tracking) # Do this to ensure speed can be maintained if nn_processed_bbs >= 1 and not self.unknowntraining: print( "quota for net.forward() is consumed, treat this bb in next frame!" ) identitylist.append( -99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue if (not self.training or self.training and self.unknowntraining) and len( self.prev_bbs) > 0 and matchingresult[0] < 0: print( "Tracking fail, tracking dist is {}, do normal flow" .format(matchingresult[1])) #if self.rgbFrame_mutex.acquire(): alignedFace = align.align( args.imgDim, self.rgbFrame, bb, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) #self.rgbFrame_mutex.release() if args.verbose: print("Find landmarks and alignment took {} seconds.". format(time.time() - start)) start = time.time() if alignedFace is None: identitylist.append( -99) # -99 means its not going to be tracked replist.append(None) scorelist.append(0.0) continue # the hash is used as the key for the map phash = str(imagehash.phash(Image.fromarray(alignedFace))) # if args.verbose: # print("Image hash took {} seconds.".format(time.time() - start)) # start = time.time() #Determine identity by 1. getting representation from nn forward, 2. svm of the representation #if phash in self.images: #identity = self.images[phash].identity #print("phash in self.image, identity is {}".format(identity)) #else: if self.training: if self.unknowntraining: # self.unknowntraininglatestindex += 1 # identity += 1 # newpersonname = "Unknown" + str(self.unknowntraininglatestindex) # self.people.append(newpersonname) # print("A new person is detected in unknown training mode -> {}".format(newpersonname)) # msg = { # "type": "NEW_PERSON", # "val": newpersonname, # "identity": identity # } # self.sendMessage(json.dumps(msg)) # self.unknowntraining_list.append(identity) # if identity not in identities: # identities.append(identity) identitylist.append(-1) replist.append(None) scorelist.append(0.0) # in unknowntraining, dont actual train for this time, since it may be garbage, # train in the tracking part to ensure it is more likely a real person else: self.trainingnumber += 1 self.trainingPhashs.append(phash) self.trainingAlignFaces.append(alignedFace) self.trainingIdentity.append(identity) if identity in self.trainingnumber_foreachide: self.trainingnumber_foreachide[identity] += 1 else: self.trainingnumber_foreachide[identity] = 1 name = "Learn: OK [" + str( self.trainingnumber_foreachide[identity]) + "]" print( "{} -> {}, identity {},in known person training mode" .format( name, self.people[ self.identity_ofppl.index(identity)], identity)) #print(name) NameAndBB.append( (name, bb.left() * inv_scale, bb.top() * inv_scale, (bb.right() - bb.left()) * inv_scale, (bb.bottom() - bb.top()) * inv_scale, identity, isNewlyDetect, BestMatchSimilarityScore)) else: rep = net.forward(alignedFace) #isNewlyDetect = True #print("A newly detected face!") nn_processed_bbs += 1 if args.verbose: print( "Neural network forward pass took {} seconds.". format(time.time() - start)) start = time.time() #Determine the identity of the rep if len(self.people) == 0: identity = -1 #unknown elif len(self.people) >= 1: if len(self.people) == 1: #identity = 0 identity = self.identity_ofppl[0] elif self.svm: #when added person >1, the identity is the index return by svm identity = self.svm.predict(rep)[0] #also need to double confirm with the probability of each class prob = self.svm.predict_proba(rep)[0] print("prob of each class: {}".format(' '.join( str(e) for e in prob))) if max(prob) < 0.8: identity = -1 print( "Top prob < 0.8, not so sure is one of the trained person, treat as unknown" ) #double confirm with class mean and std to confirm if not self.mean: self.getData() if identity >= 0: if self.mean and self.std: diff = np.absolute(self.mean[identity] - rep) dist_to_center = np.linalg.norm(diff) print( "This bb rep distance to class centre is {}" .format(dist_to_center)) #print("This class std is : {}".format(self.std[identity])) #Best match: score 1, poorest match: score 0 BestMatchSimilarityScore = round_to_1( math.exp(-1 * dist_to_center)) print("BestMatchSimilarityScore is {}". format(BestMatchSimilarityScore)) #check if diff > 6*std in any of the dimension largest_ratio = 0 for idx, val in enumerate( self.std[identity]): print( "idx: {}, Diff: {}, std: {}, ratio: {}" .format(idx, diff[idx], val, diff[idx] / val)) ratio = diff[idx] / val if ratio > largest_ratio: largest_ratio = ratio if ratio > 5: identity = -1 print( "Diff > 6*Std, not so sure is one of the trained person, treat as unknown" ) break print("Largest ratio so far is {}".format( largest_ratio)) else: identity = -1 else: print("hhh") identity = -1 if identity not in identities: identities.append(identity) if identity in self.tracked_list_of_ppl: self.tracked_list_of_ppl.remove(identity) identitylist.append(identity) replist.append(rep) scorelist.append(BestMatchSimilarityScore) if not self.training: start = time.time() #Determine the name to display if identity == -1: # if len(self.people) == 1: # name = self.people[0] # else: name = "Unknown" else: name = self.people[self.identity_ofppl.index( identity )] #+ ", " + str(round_to_1(prob[0]*100)) + "%" print("[{}] is detected! its identity is {}".format( name, identity)) # NameAndBB.append((name, bb.left()*inv_scale, bb.top()*inv_scale, (bb.right()-bb.left())*inv_scale, # (bb.bottom()-bb.top())*inv_scale, identity, isNewlyDetect, BestMatchSimilarityScore)) # end bbs for loop #if self.stop_all_tracking: #self.stop_all_tracking = False # save this frame bbs and identity rep info for the next frame for tracking if (not self.training or self.training and self.unknowntraining ) and len(bbs) > 0 and not StopUpdatePrev: #Make tracking more easy for unknowntraining mode, even one frame miss the target bb if self.unknowntraining: for idx, val in enumerate(self.prev_identity): if not val in identitylist and val > -1: if not val in self.rescuetimes or val in self.rescuetimes and self.rescuetimes[ val] < 30: bbs.append(self.prev_bbs[idx]) identitylist.append(self.prev_identity[idx]) replist.append(self.prev_rep[idx]) scorelist.append(self.prev_score[idx]) if not val in self.rescuetimes: self.rescuetimes[val] = 1 else: self.rescuetimes[val] += 1 print( "in unknowntraining mode, rescue identity {} for the {} time" .format(val, self.rescuetimes[val])) else: print( "in unknowntraining mode, cannot rescue identity {} anymore" .format(val)) elif val in identitylist and val > -1: self.rescuetimes[val] = 0 self.prev_bbs = bbs self.prev_identity = identitylist self.prev_rep = replist self.prev_score = scorelist #has_prev = True # finally, send identities and annotated msg to client if not self.training: start = time.time() #dont send identities msg too often, since no this need if self.counter % 10 == 0: msg = {"type": "IDENTITIES", "identities": identities} self.sendMessage(json.dumps(msg)) # if args.verbose: # print("Send back the IDENTITIES took {} seconds.".format( # time.time() - start)) # start = time.time() self.lastframetime = time.time() - framestart if args.verbose: print("One frame took {} seconds. fps= {}".format( self.lastframetime, 1 / self.lastframetime)) #else: #print("Skip frame") print( "==================================================================" ) msg = { "type": "ANNOTATED", "content": NameAndBB, "fps": round_to_1(1 / self.lastframetime) } self.sendMessage(json.dumps(msg))