def main(): dataset = HomoAdapDataset("/home/ai/Code/Data/coco/unlabeled2017/") loader = torch.utils.data.DataLoader( dataset, batch_size=4, shuffle=False, num_workers=1, pin_memory=True) loss_fn = UnsuperLoss() model = SiameseUnsuperPoint() model.train() optimizer = torch.optim.Adam(model.parameters()) for inputs in loader: print(list(inputs.keys())) with torch.enable_grad(): loss, data = utils.forward_pass(model, loss_fn, inputs) print(loss.item()) brute_force_match(data["A"]["F"], data["B"]["F"]) utils.backward_pass(optimizer, loss)
def _step_fn(self, step, inputs): tgt_i = inputs['tgt_i'].item() # Forward pass and loss with torch.no_grad(): loss, data = utils.forward_pass(self.model, self.loss_fn, inputs) # New sequence if self.prev_tgt_i != tgt_i - 1: print("\n" + "=" * 20 + "\nNew sequence\n" + "=" * 20 + "\n") self.ates += evaluation.eval_path(self.gt_poses, self.pred_poses) self.gt_poses = [] self.pred_poses = [] print(f"{step}/{len(self.loader)-1} - {tgt_i}") # Always poses = data["pose"] T_pred = utils.torch_to_numpy( geometry.to_homog_matrix(geometry.pose_vec2mat( poses[:, 0])).squeeze(0)) self.pred_poses.append(T_pred) T_gt = utils.torch_to_numpy(data["T"].squeeze(0))[1] self.gt_poses.append(T_gt) gt_depth = data["gt_sparse"] pred_depth = data["depth"][0] metrics = evaluation.eval_depth(gt_depth, pred_depth) self.metrics = utils.dict_append(self.metrics, metrics) self.prev_tgt_i = tgt_i
def add_face(): data = {"face_present": False} encoding = None # CHECK FOR FACE IN THE IMAGE valid_face = False valid_face = face_present('saved_image/new.jpg') # add user only if there is a face inside the picture if valid_face: # create image encoding # encoding = img_to_encoding('saved_image/new.jpg', model) face_img = cv2.imread('saved_image/new.jpg') encoding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size ) # save the output for sending as json data['face_present'] = True else: # save the output for sending as json data['face_present'] = False print('No subject detected !') return data, encoding
def predict_image(): """Gets an image file via POST request, feeds the image to the FaceNet model, the resulting embedding is then sent to be compared with the embeddings database. The image file is not stored. An html page is then rendered showing the prediction result. """ if request.method == 'POST': if 'file' not in request.files: return "No file part" file = request.files['file'] filename = file.filename if filename == "": return "No selected file" if file and allowed_file(filename=filename, allowed_set=allowed_set): # Read image file as numpy array of RGB dimension img = imread(name=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If a human face is detected if img is not None: embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) embedding_dict = load_embeddings() if embedding_dict: # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face(embedding=embedding, embedding_dict=embedding_dict) return render_template('predict_result.html', identity=identity) else: return render_template( 'predict_result.html', identity= "No embedding files detected! Please upload image files for embedding!" ) else: return render_template( 'predict_result.html', identity= "Operation was unsuccessful! No human face was detected.") else: return "POST HTTP method required!"
def step_fn(step, inputs): # Forward pass and loss with torch.no_grad(): loss, data = utils.forward_pass(model, loss_fn, inputs) print("loss %f" % loss.item()) print(data.keys()) print(data["pose"].shape) for i in range(args.batch): print(list(data["pose"][i, 0, :].cpu().detach().numpy())) print(list(data["pose"][i, 1, :].cpu().detach().numpy())) print("--") depth_img = viz.tensor2depthimg( torch.cat((*data["depth"][0][:, 0], ), dim=0)) tgt_img = viz.tensor2img(torch.cat((*data["tgt"], ), dim=1)) img = np.concatenate((tgt_img, depth_img), axis=1) warp_imgs = [] #diff_imgs = [] for warp, diff in zip(data["warp"], data["diff"]): warp = restack(restack(warp, 1, -1), 0, -2) diff = restack(restack(diff, 1, -1), 0, -2) warp_imgs.append(viz.tensor2img(warp)) #diff_imgs.append(viz.tensor2diffimg(diff)) world = reconstruction.depth_to_3d_points(data["depth"][0], data["K"]) points = world[0, :].view(3, -1).transpose( 1, 0).cpu().detach().numpy().astype(np.float64) colors = (data["tgt"][0, :].view(3, -1).transpose( 1, 0).cpu().detach().numpy().astype(np.float64) + 1) / 2 loop = True while loop: key = cv2.waitKey(10) if key == 27 or pango.ShouldQuit(): exit() elif key != -1: loop = False cv2.imshow("target and depth", img) #for i, (warp, diff) in enumerate(zip(warp_imgs, diff_imgs)): for i, warp in enumerate(warp_imgs): cv2.imshow("warp scale: %d" % i, warp) #cv2.imshow("diff scale: %d" % i, diff) gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) gl.glClearColor(1.0, 1.0, 1.0, 1.0) dcam.Activate(scam) gl.glPointSize(5) pango.DrawPoints(points, colors) pose = np.identity(4) pose[:3, 3] = 0 gl.glLineWidth(1) gl.glColor3f(0.0, 0.0, 1.0) pango.DrawCamera(pose, 0.5, 0.75, 0.8) pango.FinishFrame()
def get_frame(self): success, frame_orig = self.video.read() # We are using Motion JPEG, but OpenCV defaults to capture raw images, # so we must encode it into JPEG in order to correctly display the # video stream. # faces = face_cascade.detectMultiScale(image, 1.3, 5) frame = cv2.resize(src=frame_orig, dsize=(0, 0), fx=0.5, fy=0.5) embedding_dict = load_embeddings() frame = frame[:, :, ::-1] if frame.size > 0: faces, rects = get_faces_live(img=frame, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If there are human faces detected if faces: for i in range(len(faces)): face_img = faces[i] rect = rects[i] # Scale coordinates of face locations by the resize ratio rect = [coordinate * 2 for coordinate in rect] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face(embedding=face_embedding, embedding_dict=embedding_dict) cv2.rectangle(img=frame_orig, pt1=(rect[0], rect[1]), pt2=(rect[2], rect[3]), color=(255, 215, 0), thickness=2) W = int(rect[2] - rect[0]) // 2 cv2.putText(img=frame_orig, text=identity, org=(rect[0] + W - (W // 2), rect[1] - 7), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(255, 215, 0), thickness=1, lineType=cv2.LINE_AA) ret, jpeg = cv2.imencode('.jpg', frame_orig) return jpeg.tobytes()
def get_image(): if request.method == 'POST': if 'file' not in request.files: return "No file part" file = request.files['file'] filename = file.filename if filename == "": return "No selected file" if file and allowed_file(filename=filename, allowed_set=allowed_set): # Read image file as numpy array of RGB dimension img = io.imread(fname=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If a human face is detected if img is not None: embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Save cropped face image to 'uploads/' folder save_image(img=img, filename=filename, uploads_path=uploads_path) # Remove file extension from image filename for numpy file storage being based on image filename filename = remove_file_extension(filename=filename) # Save embedding to 'embeddings/' folder save_embedding(embedding=embedding, filename=filename, embeddings_path=embeddings_path) return render_template( "upload_result.html", status="Image uploaded and embedded successfully!") else: return render_template( "upload_result.html", status= "Image upload was unsuccessful! No human face was detected." ) else: return "POST HTTP method required!"
def get_image(): """Gets an image file via POST request, feeds the image to the FaceNet model then saves both the original image and its resulting embedding from the FaceNet model in their designated folders. 'uploads' folder: for image files 'embeddings' folder: for embedding numpy files. """ if request.method == 'POST': if 'file' not in request.files: return render_template("warning.html", status="No 'file' field in POST request!") file = request.files['file'] filename = file.filename if filename == "": return render_template("warning.html", status="No selected file!") if file and allowed_file(filename=filename, allowed_set=allowed_set): filename = secure_filename(filename=filename) # Read image file as numpy array of RGB dimension img = imread(name=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If a human face is detected if img is not None: embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size ) # Save cropped face image to 'uploads/' folder save_image(img=img, filename=filename, uploads_path=uploads_path) # Remove file extension from image filename for numpy file storage being based on image filename filename = remove_file_extension(filename=filename) # Save embedding to 'embeddings/' folder save_embedding(embedding=embedding, filename=filename, embeddings_path=embeddings_path) return render_template( "upload_result.html", status="Image uploaded and embedded successfully!" ) else: return render_template( "upload_result.html", status="Image upload was unsuccessful! No human face was detected!" ) else: return render_template("warning.html", status="POST HTTP method required!")
def _step_fn(self, step, inputs): # Forward pass and loss with torch.no_grad(): loss, data = utils.forward_pass(self.model, self.loss_fn, inputs) print(list(data.keys())) print(f"loss {loss.item():.3f}") self._debug_step(loss, data)
def predict_image(file): # file = request.files['file'] # file = os.path.join(APP_ROOT, 'uploads/Abdulrahman Safh.png') # Read image file as numpy array of RGB dimension #img = io.imread(fname=file) img = imread(name=file, mode='RGB') # Detect and crop a 160 x 160 image containing a human face in the image file faces, rects = get_faces_live(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) #global d # If there are human faces detected if faces: embedding_dict = load_embeddings() if embedding_dict: people_found = [] for i in range(len(faces)): face_img = faces[i] rect = rects[i] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size ) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face( embedding=face_embedding, embedding_dict=embedding_dict) people_found.append(identity) cv2.rectangle(img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 0), 3) W = int(rect[2] - rect[0]) // 2 H = int(rect[3] - rect[1]) // 2 cv2.putText(img, identity, (rect[0] + W - (W // 2), rect[1] - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 1, cv2.LINE_AA) # code for saving the output images # cv2.imwrite("SavedImgesFull/file_%d.jpg" % d, img) #d += 1 return people_found else: # return ["No Face"] return None # return render_template( # 'predict_result.html', # identity="No embedding files detected! Please upload image files for embedding!" # ) else: # return ["No Image"] return None
def mulProcessor(): """ Processor node for multiplication operation """ l, m, n = Input(), Input(), Input() f2 = Mul(l, m, n) feed_dict2 = {l: 4, m: 5, n: 10} sorted_nodes2 = topological_sort(feed_dict2) output2 = forward_pass(f2, sorted_nodes2) print ("{} + {} + {} = {} (according to miniflow - mul)".format(feed_dict2[l], feed_dict2[m], feed_dict2[n], output2))
def addProcessor(): """ Processor node for add operation """ x, y, z = Input(), Input(), Input() f1 = Add(x, y, z) feed_dict1 = {x: 4, y: 5, z: 10} sorted_nodes1 = topological_sort(feed_dict1) output1 = forward_pass(f1, sorted_nodes1) print ("{} + {} + {} = {} (according to miniflow - add)".format(feed_dict1[x], feed_dict1[y], feed_dict1[z], output1))
def step_fn(step, inputs): # Forward pass and loss with torch.no_grad(): loss, data = utils.forward_pass(model, loss_fn, inputs) print("loss %f" % loss.item()) print(data.keys()) print(data["pose"].shape) for i in range(4): print(list(data["pose"][i, 0, :].cpu().detach().numpy())) print(list(data["pose"][i, 1, :].cpu().detach().numpy())) print("--") depth_img = viz.tensor2depthimg( torch.cat((*data["depth"][0][:, 0], ), dim=0)) tgt_img = viz.tensor2img(torch.cat((*data["tgt"], ), dim=1)) img = np.concatenate((tgt_img, depth_img), axis=1) warp_imgs = [] diff_imgs = [] for warp, diff in zip(data["warp"], data["diff"]): warp = restack(restack(warp, 1, -1), 0, -2) diff = restack(restack(diff, 1, -1), 0, -2) warp_imgs.append(viz.tensor2img(warp)) diff_imgs.append(viz.tensor2diffimg(diff)) world = inverse_warp.depth_to_3d_points(data["depth"][0], data["K"]) points = world[0, :].view(3, -1).transpose( 1, 0).cpu().detach().numpy().astype(np.float64) colors = (data["tgt"][0, :].view(3, -1).transpose( 1, 0).cpu().detach().numpy().astype(np.float64) + 1) / 2 point_cloud.points = o3d.open3d.Vector3dVector(points) point_cloud.colors = o3d.open3d.Vector3dVector(colors) vis.add_geometry(point_cloud) loop = True while loop: key = cv2.waitKey(10) if key == 27: exit() elif key != -1: loop = False vis.update_geometry() vis.poll_events() vis.update_renderer() cv2.imshow("target and depth", img) for i, (warp, diff) in enumerate(zip(warp_imgs, diff_imgs)): cv2.imshow("warp scale: %d" % i, warp) cv2.imshow("diff scale: %d" % i, diff)
def __train_step_fn(self, step, inputs): # Forward pass and loss with torch.enable_grad(): loss, data = utils.forward_pass(self.model, self.loss_fn, inputs) # Backward pass utils.backward_pass(self.optimizer, loss) # Save loss self.running_loss += loss.item() # Log information if utils.is_interval(step, self.LOG_INTERVAL): if self.SHOULD_VALIDATE: val_metrics, train_metrics = self.__validate() N_steps = len(self.loaders["train"]) percent = 100 * step / N_steps avg_loss = self.running_loss / self.LOG_INTERVAL self.running_loss = 0.0 t_sample = (time.time() - self.epoch_ts) / step / self.BATCH t_sample_ms = 1000 * t_sample eta = t_sample * self.BATCH * (N_steps - step) samples = (self.epoch * N_steps + step) * self.BATCH print( f"Epoch {self.epoch+1}/{self.EPOCHS} ({percent:3.0f}%, eta: {utils.sec_to_hms(eta)}) " + f"| {samples:5} samples | {t_sample_ms:.0f} ms/sample -> loss: {avg_loss:.3f}" ) if self.SHOULD_WRITE: self.writer.add_scalar("loss", scalar_value=avg_loss, global_step=samples) if self.SHOULD_VALIDATE: for key in val_metrics.keys(): if key == "abs_rel": self.writer.add_scalar( f"val/{key}", scalar_value=val_metrics[key], global_step=samples) self.writer.add_scalar( f"train/{key}", scalar_value=train_metrics[key], global_step=samples)
def linearProcessor(): """ Processor node for linear operation """ inputs, weights, bias = Input(), Input(), Input() f = Linear(inputs, weights, bias) feed_dict = { inputs: [6, 14, 3], weights: [0.5, 0.25, 1.4], bias: 2 } graph = topological_sort(feed_dict) output = forward_pass(f, graph) print(output , "(according to miniflow - linear)")
def linearMatrixProcessor(): """ Processor node for linear matrix operation """ X, W, b = Input(), Input(), Input() f = LinearMatrix(X, W, b) X_ = np.array([[-1., -2.], [-1, -2]]) W_ = np.array([[2., -3], [2., -3]]) b_ = np.array([-3., -5]) feed_dict = {X: X_, W: W_, b: b_} graph = topological_sort(feed_dict) output = forward_pass(f, graph) """ Output should be: [[-9., 4.], [-9., 4.]] """ print(output, "(according to miniflow - LinearMatrix)")
def linearSigmoidProcessor(): """ Processor node for linear and Sigmoid operation """ X, W, b = Input(), Input(), Input() f = LinearMatrix(X, W, b) g = Sigmoid(f) X_ = np.array([[-1., -2.], [-1, -2]]) W_ = np.array([[2., -3], [2., -3]]) b_ = np.array([-3., -5]) feed_dict = {X: X_, W: W_, b: b_} graph = topological_sort(feed_dict) output = forward_pass(g, graph) """ Output should be: [[ 1.23394576e-04 9.82013790e-01] [ 1.23394576e-04 9.82013790e-01]] """ print(output, "(according to miniflow - LinearSigmoid)")
def detail(): ''' Detects text and face in Aadhaar Card ''' if request.method == 'POST': # saving current timestamp current_time = str(datetime.datetime.now()).replace('-', '_').replace( ':', '_') # The type of image i.e. Front or Back image image_type1 = 'Front' image_type2 = 'Back' # Path for Front image and the face image that will be croppped filename1 = uploads_path + image_type1 + '/' + current_time + '.jpg' photo_path = uploads_path + image_type1 + '/' + 'faces' + '/' + current_time + '.png' # Path for Back image and the face image that will be croppped filename2 = uploads_path + image_type2 + '/' + current_time + '.jpg' crop_path = uploads_path + image_type2 + '/temp/' + current_time + '.png' # if the Front folder (in uploads) doesn't already exist, create it if not os.path.exists(uploads_path + image_type1): os.mkdir(uploads_path + image_type1) # directory for saving faces in the id cards os.mkdir(uploads_path + image_type1 + '/' + 'faces') # if the Back folder (in uploads) doesn't already exist, create it if not os.path.exists(uploads_path + image_type2): os.mkdir(uploads_path + image_type2) os.mkdir(uploads_path + image_type2 + '/temp') # variable to store details extracted from card details = {} # get Front Card Photo from user photo1 = request.files['photo-front'] photo1.save(filename1) # get Front Card Photo from user photo2 = request.files['photo-back'] photo2.save(filename2) print("Processing Front Image ......") # Process The Front Card Image data, photo_path = recognise_text(filename1, photo_path) details = get_labels_from_aadhar(data) print("Processing Front Image ...... DONE") print("Processing Back Image .......") # Process The Back Card Image crop_aadhar(filename2, crop_path) data2, photo_path2 = recognise_text(crop_path, 'none') details.update(get_address(data2)) print("Processing Back Image ....... DONE") os.remove(crop_path) data_dict = { 'status': True, 'fields': details, 'image_path_front': filename1, 'image_path_back': filename2, 'photo_path': photo_path } print("save into json files") # the json file where the output must be stored with open('myfile.json', 'a+') as out_file: json.dump(data_dict, out_file, indent=6) img = imread(name=photo_path, mode='RGB') print("Processing Face Image .......") # Detect and crop a 160 x 160 image containing a human face in the image file img = get_face(img=img, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) embedding = forward_pass( img=img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) print("Processing Face Image ....... DONE") # Save The Face embedding as the name of the Person filename = data_dict['fields']['Name'] filename = secure_filename(filename=filename) # Save embedding to 'embeddings/' folder save_embedding(embedding=embedding, filename=filename, embeddings_path=embeddings_path) # Write the Raw and Cleaned Text detected from the Card with open('outputs.txt', 'a+') as f: f.write( "##########################################################################\n\n" ) f.write( '######################## Raw Output for Front Card Image #############################\n\n' ) for value in data: f.write(str(value) + '\n') f.write( "##########################################################################\n\n" ) f.write( '######################## Raw Output for Back Card Image #############################\n\n' ) for value in data2: f.write(str(value) + '\n') f.write( '\n\n######################## Cleaned Output #############################\n\n' ) for key, value in details.items(): f.write(str(key) + ' : ' + str(value) + '\n') f.write( "##########################################################################\n\n" ) return jsonify(data_dict) else: # if not POST, terminate return jsonify({'status': False})
def face_detect_live(): """Detects faces in real-time via Web Camera.""" embedding_dict = load_embeddings() if embedding_dict: try: # Start non-blocking multi-threaded OpenCV video stream cap = WebcamVideoStream(src=0).start() while True: frame_orig = cap.read() # Read frame # Resize frame to half its size for faster computation frame = cv2.resize(src=frame_orig, dsize=(0, 0), fx=0.5, fy=0.5) # Convert the image from BGR color (which OpenCV uses) to RGB color frame = frame[:, :, ::-1] if cv2.waitKey(1) & 0xFF == ord('q'): break if frame.size > 0: faces, rects = get_faces_live(img=frame, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If there are human faces detected if faces: for i in range(len(faces)): face_img = faces[i] rect = rects[i] # Scale coordinates of face locations by the resize ratio rect = [coordinate * 2 for coordinate in rect] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face( embedding=face_embedding, embedding_dict=embedding_dict) cv2.rectangle(img=frame_orig, pt1=(rect[0], rect[1]), pt2=(rect[2], rect[3]), color=(255, 215, 0), thickness=2) W = int(rect[2] - rect[0]) // 2 cv2.putText(img=frame_orig, text=identity, org=(rect[0] + W - (W // 2), rect[1] - 7), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(255, 215, 0), thickness=1, lineType=cv2.LINE_AA) cv2.imshow(winname='Video', mat=frame_orig) # Keep showing camera stream even if no human faces are detected cv2.imshow(winname='Video', mat=frame_orig) else: continue cap.stop() # Stop multi-threaded Video Stream cv2.destroyAllWindows() return render_template(template_name_or_list='index.html') except Exception as e: print(e) else: return render_template( template_name_or_list="warning.html", status= "No embedding files detected! Please upload image files for embedding!" )
def face_detect_live(): # Load text reading engine #engine = pyttsx3.init() spoken_face_names = [] greetings = [ 'How do you do', 'Hello', 'Hi', 'Hai', 'Hey', 'How have you been', 'How are you', 'How is it going', 'Salam alikom ', 'Esh loonak ya', 'Ahlaaaan' ] embedding_dict = load_embeddings() if embedding_dict: try: cap = cv2.VideoCapture(0) while True: return_code, frame = cap.read() # RGB frame if cv2.waitKey(1) & 0xFF == ord('q'): break faces, rects = get_faces_live(img=frame, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If there are human faces detected if faces: for i in range(len(faces)): face_img = faces[i] rect = rects[i] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face(embedding=face_embedding, embedding_dict=embedding_dict) cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (255, 215, 0), 2) W = int(rect[2] - rect[0]) // 2 H = int(rect[3] - rect[1]) // 2 cv2.putText(frame, identity, (rect[0] + W - (W // 2), rect[1] - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 215, 0), 1, cv2.LINE_AA) if identity == "Unknown": continue elif identity in spoken_face_names: continue else: print(random.choice(greetings) + " " + identity) #engine.say(random.choice(greetings) + name) #engine.runAndWait() spoken_face_names.append(identity) continue cv2.imshow('Video', frame) else: continue cap.release() cv2.destroyAllWindows() return render_template('index.html') except Exception as e: print(e) else: return "No loaded faces detected! Please upload image files for embedding!"
def face_detect_live(): """Detects faces in real-time via Web Camera.""" embedding_dict = load_embeddings() if embedding_dict: try: cap = cv2.VideoCapture(0) while True: cap.grab() # For use in multi-camera environments when the cameras do not have hardware synchronization return_code, frame_orig = cap.read() # Read frame # Resize frame to half its size for faster computation frame = cv2.resize(frame_orig, (0, 0), fx=0.5, fy=0.5) # Convert the image from BGR color (which OpenCV uses) to RGB color frame = frame[:, :, ::-1] if cv2.waitKey(1) & 0xFF == ord('q'): break if frame.size > 0: faces, rects = get_faces_live(img=frame, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If there are human faces detected if faces: for i in range(len(faces)): face_img = faces[i] rect = rects[i] # Scale coordinates of face locations by the resize ratio rect = [coordinate * 2 for coordinate in rect] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size ) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face(embedding=face_embedding, embedding_dict=embedding_dict) cv2.rectangle(frame_orig, (rect[0], rect[1]), (rect[2], rect[3]), (255, 215, 0), 2) W = int(rect[2] - rect[0]) // 2 H = int(rect[3] - rect[1]) // 2 cv2.putText(frame_orig, identity, (rect[0]+W-(W//2), rect[1]-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 215, 0), 1, cv2.LINE_AA) cv2.imshow('Video', frame_orig) # Keep showing camera stream even if no human faces are detected cv2.imshow('Video', frame_orig) else: continue cap.release() cv2.destroyAllWindows() return render_template('index.html') except Exception as e: print(e) else: return render_template( "warning.html", status="No embedding files detected! Please upload image files for embedding!" )
def _step_fn(self, step, inputs): # Forward pass and loss with torch.no_grad(): loss, data = utils.forward_pass(self.model, self.loss_fn, inputs) print(f"loss {loss.item():.3f}") for i in range(data["pose"].shape[1]): pose = list(data["pose"][0, i, :].cpu().detach().numpy()) #print("pose %d -> x: %.6f, y: %.6f, z: %.6f, rx: %.6f, ry: %.6f, rz: %.6f" % (i, *pose)) poses = data["pose"] T0 = utils.torch_to_numpy( geometry.to_homog_matrix(geometry.pose_vec2mat( poses[:, 1])).squeeze(0)) T1 = np.identity(4) T1[:3, 3] = 0 T2 = utils.torch_to_numpy( geometry.to_homog_matrix(geometry.pose_vec2mat( poses[:, 0])).squeeze(0)) T_gt = utils.torch_to_numpy(data["T"].squeeze(0)) T0_gt = T_gt[0] T1_gt = np.identity(4) T1_gt[:3, 3] = 0 T2_gt = T_gt[1] Ta, Tb, Tc = T0.copy(), T1.copy(), T2.copy() Ta_gt, Tb_gt, Tc_gt = T0_gt.copy(), T1_gt.copy(), T2_gt.copy() # Trajectory if self.prev_tgt_i != data["tgt_i"] - 1 or self.scale is None: self.positions = [] # New sequence! self.positions_gt = [] self.scale = np.linalg.norm(Tc_gt[:3, -1] - Ta_gt[:3, -1] ) / np.linalg.norm(Tc[:3, -1] - Ta[:3, -1]) self.prev_tgt_i = data["tgt_i"] Ta_gt[:3, -1] /= self.scale Tc_gt[:3, -1] /= self.scale print(Tc_gt) print(Tc) if len(self.positions) == 0: self.positions = [Ta, Tb, Tc] self.positions_gt = [Ta_gt, Tb_gt, Tc_gt] else: inv = np.linalg.pinv(self.positions[-1]) self.positions = [inv @ T for T in self.positions] self.positions.append(Tc) inv_gt = np.linalg.pinv(self.positions_gt[-1]) self.positions_gt = [inv_gt @ T_gt for T_gt in self.positions_gt] self.positions_gt.append(Tc_gt) # Debug images depth_img = viz.tensor2depthimg(data["depth"][0][0, 0]) tgt_img = viz.tensor2img(data["tgt"][0]) img = np.concatenate((tgt_img, depth_img), axis=1) tgtrefs = viz.tensor2img( torch.cat((data["refs"][0, 0], data["tgt"][0], data["refs"][0, 1]), dim=1)) points, colors = to_points_3d(data["tgt"][0], data["depth"][0], data["K"]) loop = True while loop: key = cv2.waitKey(10) if key == 27 or self.renderer.should_quit(): exit() elif key != -1: loop = False cv2.imshow("target and depth", img) cv2.imshow("target and refs", tgtrefs) self.renderer.clear_screen() self.renderer.draw_points(points, colors) line = [T[:3, 3] for T in self.positions] line_gt = [T[:3, 3] for T in self.positions_gt] self.renderer.draw_line(line, color=(1., 0., 0.)) self.renderer.draw_line(line_gt, color=(0., 1., 0.)) #self.renderer.draw_cameras([T0], color=(1.,0.,0.)) #self.renderer.draw_cameras([T1], color=(0.,1.,0.)) #self.renderer.draw_cameras([T2], color=(0.,0.,1.)) self.renderer.finish_frame()
def predict(): # this will contain the data = {"success": False} # for keeping track of authentication status data['authenticate'] = False # ensure an image was properly uploaded to our endpoint if flask.request.method == "POST": if flask.request.files.get("image"): # read the image in PIL format image = flask.request.files["image"].read() image = np.array(Image.open(io.BytesIO(image))) # save the image on server side cv2.imwrite('saved_image/new.jpg', cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) # CHECK FOR FACE IN THE IMAGE valid_face = False valid_face = face_present('saved_image/new.jpg') # do facial recognition only when there is a face inside the frame if valid_face: # find image encoding and see if the image is of a registered user or not # encoding = img_to_encoding('saved_image/new.jpg', model) face_img = cv2.imread('saved_image/new.jpg') encoding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size ) min_dist, identity, authenticate = face_recognition( encoding, user_db, model, threshold=0.9) # save the output for sending as json data["min_dist"] = str(min_dist) data['email'] = identity if identity != 'Unknown Person': data['name'] = user_db[identity]['name'] else: data['name'] = 'Unknown Person' data['face_present'] = True data['authenticate'] = authenticate else: # save the output for sending as json data["min_dist"] = 'NaN' data['identity'] = 'NaN' data['name'] = 'NaN' data['face_present'] = False data['authenticate'] = False print('No subject detected !') # indicate that the request was a success data["success"] = True # create a new session Session = sessionmaker(bind=engine) s = Session() # check if the user is logged in if data['authenticate']: session['logged_in'] = True else: flash('Unknown Person!') # return the data dictionary as a JSON response return flask.jsonify(data)
def face_detect_live(): """Detects faces in real-time via Web Camera.""" embedding_dict = load_embeddings() if embedding_dict: try: cap = cv2.VideoCapture(0) while True: return_code, frame = cap.read() # RGB frame if cv2.waitKey(1) & 0xFF == ord('q'): break if frame.size > 0: faces, rects = get_faces_live(img=frame, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) # If there are human faces detected if faces: for i in range(len(faces)): face_img = faces[i] rect = rects[i] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face( embedding=face_embedding, embedding_dict=embedding_dict) cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (255, 215, 0), 2) W = int(rect[2] - rect[0]) // 2 H = int(rect[3] - rect[1]) // 2 cv2.putText(frame, identity, (rect[0] + W - (W // 2), rect[1] - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 215, 0), 1, cv2.LINE_AA) cv2.imshow('Video', frame) # Keep showing camera stream even if no human faces are detected cv2.imshow('Video', frame) else: continue cap.release() cv2.destroyAllWindows() return render_template('index.html') except Exception as e: print(e) else: return "No embedding files detected! Please upload image files for embedding!"
def __val_step_fn(self, step, inputs, metrics_sum): with torch.no_grad(): loss, data = utils.forward_pass(self.model, self.loss_fn, inputs) metrics = self.calc_metrics(data) utils.sum_to_dict(metrics_sum, metrics)
def get_frame(): embedding_dict = load_embeddings() if embedding_dict: try: cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 500) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 500) while True: #get camera frame ret, frame = cap.read() # Read frame #print(frame) # Resize frame to half its size for faster computation frame = cv2.resize(src=frame, dsize=(0, 0), fx=0.8, fy=0.8) # Convert the image from BGR color (which OpenCV uses) to RGB color #frame = frame[:, :, ::-1] #if cv2.waitKey(1) & 0xFF == ord('q'): # break # if frame.size > 0: faces, rects = get_faces_live(img=frame, pnet=pnet, rnet=rnet, onet=onet, image_size=image_size) if faces: for i in range(len(faces)): face_img = faces[i] rect = rects[i] # Scale coordinates of face locations by the resize ratio rect = [coordinate for coordinate in rect] face_embedding = forward_pass( img=face_img, session=facenet_persistent_session, images_placeholder=images_placeholder, embeddings=embeddings, phase_train_placeholder=phase_train_placeholder, image_size=image_size) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' identity = identify_face( embedding=face_embedding, embedding_dict=embedding_dict) cv2.rectangle(img=frame, pt1=(rect[0], rect[1]), pt2=(rect[2], rect[3]), color=(0, 0, 255), thickness=2) W = int(rect[2] - rect[0]) // 2 cv2.putText(img=frame, text=identity, org=(rect[0] + W - (W // 2), rect[1] - 7), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 255), thickness=1, lineType=cv2.LINE_AA) ret, jpeg = cv2.imencode('.jpg', frame) ret, jpeg = cv2.imencode('.jpg', frame) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jpeg.tobytes() + b'\r\n\r\n') else: continue except Exception as e: print(e)