def eval_extract_faces(path, check_if_known=True, max_num_proc_imgs=None, central_con=None, local_con=None, close_connections=True): path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_names_and_date = set(DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id(path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) if max_num_proc_imgs is not None: return zip(range(start_img_id, max_num_proc_imgs + 1), img_loader) return enumerate(img_loader, start=start_img_id) def store_embedding_row_dicts(con): print('----- get_embedding_row_dicts -----') # TODO: Also auto-increment emb_id etc. embedding_id = initial_max_embedding_id + 1 for img_id, (img_path, img_name, img) in get_counted_img_loader(): print_progress(img_id, 'image') last_modified = datetime.datetime.fromtimestamp(round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) face = Models.altered_mtcnn.forward_return_results(img) if face is None: log_error(f"no faces found in image '{img_path}'") continue embedding_row_dict = {Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id} DBManager.store_embedding(embedding_row_dict, con=con, close_connections=False) embedding_id += 1 DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)
def extract_faces(path, check_if_known=True, central_con=None, local_con=None, close_connections=True): # TODO: Refactor (extract functions)? + rename # TODO: Generate Thumbnails differently? (E.g. via Image.thumbnail or sth. like that) # TODO: Store + update max_img_id and max_embedding_id somewhere rather than (always) get them via DB query? path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_rel_paths_and_dates = set( DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id(path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) return enumerate(img_loader, start=start_img_id) def store_embedding_row_dicts(con): # TODO: Also auto-increment emb_id etc. max_embedding_id = initial_max_embedding_id for img_id, (img_abs_path, img_rel_path, img) in get_counted_img_loader(): # TODO: Implement automatic deletion cascade! (Using among other things on_conflict clause and FKs) # ---> Done? # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_abs_path).st_mtime)) if check_if_known and (img_rel_path, last_modified) in imgs_rel_paths_and_dates: continue DBManager.store_image(img_id=img_id, rel_file_path=img_rel_path, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_abs_path}'") continue # TODO: Better way to create these row_dicts? embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate(faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces) DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)
def extract_faces_measure(path, n, check_if_known=True, central_con=None, local_con=None, close_connections=True): path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_names_and_date = set( DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id( path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) nums = range(start_img_id, start_img_id + n) return zip(nums, img_loader) def store_embedding_row_dicts(con): max_embedding_id = initial_max_embedding_id for img_id, (img_path, img_name, img) in get_counted_img_loader(): # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_path}'") continue embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate( faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces) DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)