def __init__(self, folder, model='hog', max_size=1000, num_jitters=1, encoding_model='large', distance_metric='default', threshold_equal=0.17, cuda_memory_limit=0, trash_face_file=None): self.__folder = folder if not os.path.exists(self.__folder): os.makedirs(self.__folder) log.debug(f'trash_face_file: {trash_face_file}') if trash_face_file is not None and os.path.exists(trash_face_file): trash_folder = os.path.join(self.__folder, TRASH_FOLDERNAME) os.mkdir(trash_folder) shutil.copyfile(trash_face_file, os.path.join(trash_folder, FACE_FILENAME)) self.__files = {} self.__persons = [] self.__basenames = {} self.__model = model self.__encoding_model = encoding_model self.__max_size = int(max_size) self.__num_jitters = int(num_jitters) self.__distance_metric = distance_metric self.__threshold_equal = float(threshold_equal) self.__cuda_memory_limit = cuda_memory_limit self.__encoder = None self.__pickle_file = os.path.join(folder, 'patterns.pickle') if not os.path.exists(self.__pickle_file): self.generate(True)
def clean_tags(self, filename, tags=None, tag_prefix=None, commit=True): fid = self.__get_id(filename) if fid is None: log.warning(f'Filename not found: {filename}') return 0 res = 0 if tags is not None: for tag in tags: res += self.__clean_tag(fid, self.__get_tag_id(tag, TAG_TYPE_PHOTO), commit) res += self.__clean_tag(fid, self.__get_tag_id(tag, TAG_TYPE_VIDEO), commit) if tag_prefix is not None: tag_ids = self.__get_tag_ids(tag_prefix) for tag_id in tag_ids: res += self.__clean_tag(fid, tag_id, commit) if commit: self.__conn.commit() log.debug(f'Removed {res} tags for {filename}') return res
def __remove_file(self, filename): del self.__files[self.relpath(filename)] try: del self.__basenames[os.path.basename(filename)] except KeyError: pass log.debug(f'File removed: {filename}')
def __delete_tag(self, tag_id, cleanup=False, commit=True): if self.__readonly: return c = self.__conn.cursor() if cleanup: res = c.execute( 'DELETE FROM taggings \ WHERE tag_id=?', (tag_id, )).rowcount if res != 0: log.debug(f'Removed {res} taggings for tag {tag_id}') else: res = c.execute( 'SELECT count(*) FROM taggings \ WHERE tag_id=?', (tag_id, )) count = res.fetchone()[0] if count != 0: raise Exception(f'Found {count} taggings for tag {tag_id}') res = c.execute('DELETE FROM tags \ WHERE id=?', (tag_id, )).rowcount if commit: self.__conn.commit() return res
def __init__(self, filename, readonly=False): log.debug(f'Connect to {filename} ({readonly})') self.__conn = sqlite3.connect('file:' + filename + ('?mode=ro' if readonly else ''), uri=True) self.__tag_cache = {} self.__readonly = readonly
def __match_faces(self, encoded_faces): if len(self.__pattern_encodings) == 0: log.warning('Empty patterns') for i in range(len(encoded_faces)): if self.__step_stage_face(): return False encoding = encoded_faces[i]['encoding'] dist, name, pattern = self.__match_face_by_nearest( encoding, patterns.PATTERN_TYPE_GOOD) if dist > 0.001: # skip zero match dist_bad, name_bad, pattern_bad = self.__match_face_by_nearest( encoding, patterns.PATTERN_TYPE_BAD) # match to bad only equal faces if dist_bad < self.__threshold_equal and dist_bad < dist: name = name_bad + '_bad' dist = dist_bad pattern = pattern_bad log.debug(f'matched: {name}: {dist}: {pattern}') if 'name' in encoded_faces[i]: encoded_faces[i]['oldname'] = encoded_faces[i]['name'] if dist < self.__threshold: pass elif dist < self.__threshold_weak: name += '_weak' else: name = '' dist = 1 encoded_faces[i]['name'] = name encoded_faces[i]['dist'] = dist encoded_faces[i]['pattern'] = pattern return True
def get_files_faces(self, where_clause, args=(), get_count=True): c = self.__conn.cursor() if get_count: start = time.time() res = c.execute( 'SELECT COUNT(DISTINCT filename) \ FROM files JOIN faces ON files.id=faces.file_id ' + where_clause, args) count = res.fetchone()[0] elapsed = time.time() - start log.debug( f'Count of "{where_clause}" fetched in {elapsed} sec: {count}') if count == 0: return 0, iter(()) else: count = -1 start = time.time() res = c.execute( 'SELECT filename, faces.id, box, encoding, \ landmarks, name, dist, frame, pattern \ FROM files JOIN faces ON files.id=faces.file_id ' + where_clause, args) elapsed = time.time() - start log.debug(f'"{where_clause}" fetched in {elapsed} sec') return count, self.__yield_files_faces(tools.cursor_iterator(res))
def list_files(path, exts=None, nomedia_names=()): log.debug(f'list_files: {path}, {exts}, {nomedia_names}') files = [] for p in glob.glob(path): files += __list_files(p, exts, nomedia_names) log.debug(f'list_files: found {len(files)} files') return files
def frames(self): if self.__frames is None: log.debug(f'LazyVideo load: {self.__video_file}') self.__frames = read_video(self.__video_file, self.__max_size, self.__max_video_frames, self.__video_frames_step) return self.__frames
def __set_landmarks(image, face_landmarks): for pts in face_landmarks.values(): for pt in pts: try: r, g, b = image.getpixel(pt) image.putpixel(pt, ((128 + r) % 255, (128 + g) % 255, (128 + b) % 255)) except IndexError: log.debug(f'Incorrect landmark point: {pt}')
def do_GET(self): log.debug('do_GET: ' + self.path) try: path, params = self.__path_params() if path == '/list_cache': self.__list_cache(params) return if path == '/get_names': self.__get_names() return if path == '/get_name_image': self.__get_name_image(params) return if path == '/get_folders': self.__get_folders() return if path == '/get_status': self.__get_status() return if path == '/get_face_src': self.__get_face_src(params) return if path == '/get_face_pattern': self.__get_face_pattern(params) return if path == '/': path = 'index.html' if '..' in path: log.warning('".." in path: ' + path) self.__not_found_response() return ext = tools.get_low_ext(path) if ext in ('.html', '.js', '.css', '.png', '.jpg'): self.__file_request(path, params) return log.warning('Wrong path: ' + path) self.__not_found_response() except Exception as ex: self.__server_error_response(str(ex)) log.exception(ex)
def get_all_encodings(self, encodings_split=1): if self.__all_encodings is None: log.debug(f'loading all encodings...') files_faces = tools.filter_images(self.get_all()[1]) encodings = [] info = [] for ff in files_faces: for face in ff['faces']: encodings.append(face['encoding']) info.append((ff['filename'], face)) np_encodings = numpy.array_split(numpy.array(encodings), encodings_split) self.__all_encodings = (np_encodings, info) log.debug(f'{len(info)} encodings was loaded') return self.__all_encodings
def __init__(self, filename, readonly=False): log.debug(f'Connect to {filename} ({readonly})') sqlite3.register_adapter(numpy.ndarray, adapt_array) sqlite3.register_converter('array', convert_array) self.__conn = sqlite3.connect('file:' + filename + ('?mode=ro' if readonly else ''), detect_types=sqlite3.PARSE_DECLTYPES, uri=True) if not readonly: self.__conn.executescript(SCHEMA) self.__readonly = readonly atexit.register(self.commit) self.__all_encodings = None # all encodings for searching by face
def optimize(self): encoder = self.__get_encoder() # get encodings and reverse to preserve old patterns encs, names, files = self.encodings() encs.reverse() names.reverse() files.reverse() # convert to numpy array and get length for optimization reasons encs = np.array(encs) encs_len = len(encs) to_remove = [] while 0 < encs_len: log.debug(f'to optimize check: {encs_len}') name = names.pop() fname = files.pop() # numpy array pop() enc, encs = encs[-1], encs[:-1] encs_len -= 1 dists = encoder.distance(encs, enc) i = 0 while i < encs_len: if dists[i] < self.__threshold_equal: if name != names[i]: fn1 = self.fullpath(fname) fn2 = self.fullpath(files[i]) log.warning( f'Different persons {dists[i]} "{fn1}" "{fn2}"') else: to_remove.append(self.fullpath(files[i])) log.info(f'eq: {fname} {files[i]}') names.pop(i) files.pop(i) encs = np.delete(encs, i, axis=0) dists = np.delete(dists, i, axis=0) encs_len -= 1 i += 1 self.remove_files(to_remove) log.info(f'{len(to_remove)} files was optimized.')
def __get_files_faces_by_filter(self, fltr): log.debug(f'Get by filter: {fltr}') tp = fltr['type'] if tp == 'unmatched': return self.__db.get_unmatched() elif tp == 'all': return self.__db.get_all() elif tp == 'weak': return self.__db.get_weak(fltr['path']) elif tp == 'weak_unmatched': return self.__db.get_weak_unmatched(fltr['path']) elif tp == 'folder': return self.__db.get_folder(fltr['path']) elif tp == 'name': return self.__db.get_by_name(fltr['path'], fltr['name']) else: raise Exception(f'Unknown filter type: {tp}')
def set_tags(self, resync=False): log.info(f'Set tags started ({resync})') self.__create_tags() if resync: count, files_faces = self.__recdb.get_all() else: count, files_faces = self.__recdb.get_unsynced() images_count = 0 faces_count = 0 for ff in tools.reduce_faces_from_videos(files_faces, self.__min_video_face_count): filename = ff['filename'] self.__plexdb.clean_tags(filename, tag_prefix=TAG_PREFIX, commit=False) tags = [] for face in ff['faces']: name = face['name'] if name in self.__names: tags.append(TAG_PREFIX + name) log.debug(f"sync tags for image: {filename}: " + str(tags)) if len(tags) != 0: ext = tools.get_low_ext(filename) if ext in tools.IMAGE_EXTS: self.__plexdb.set_tags(filename, tags, plexdb.TAG_TYPE_PHOTO, commit=False) elif ext in tools.VIDEO_EXTS: self.__plexdb.set_tags(filename, tags, plexdb.TAG_TYPE_VIDEO, commit=False) self.__recdb.mark_as_synced(filename, commit=False) images_count += 1 faces_count += len(tags) self.__plexdb.commit() self.__recdb.commit() log.info(f'Set tags done: images={images_count} faces={faces_count}')
def enable_landmarks(filename, enable): descr, thumbnail = load_face_description(filename) enabled = thumbnail is not None if enable == enabled: log.debug(f'enable_landmarks skip: {filename}') return if descr is None or 'landmarks' not in descr: log.warning(f'has no landmarks: {filename}') return image = Image.open(filename) if enable: thumbnail = image.copy() __set_landmarks_lines(image, descr['landmarks']) else: image = thumbnail thumbnail = None save_with_description(image, descr, thumbnail, filename)
def reduce_faces_from_video(faces, min_count): def test_face(face): return face['name'] != '' and \ (face['count'] > min_count or face['dist'] < 0.01) dct = collections.defaultdict(lambda: {'dist': sys.maxsize, 'count': 0}) for face in faces: name = face['name'] count = dct[name]['count'] + 1 if face['dist'] < dct[name]['dist']: dct[name] = face dct[name]['count'] = count res = [] for face in dct.values(): ok = True if face['name'].endswith('_weak'): n = face['name'][:-5] if n in dct: if test_face(dct[n]): # skip weak because of name already persist in current file ok = False log.debug(f'skip weak: {n}') else: # extend weak count by already persisted name face['count'] += dct[n]['count'] log.debug(f'extend weak: {n}') if ok: ok = test_face(face) log.debug(f'faces in video: {face["name"]}: {face["count"]}: {ok}') if ok: res.append(face) return res
def do_POST(self): log.debug('do_POST: ' + self.path) try: path, params = self.__path_params() if path == '/add_to_pattern': self.__add_to_pattern_request(params, self.__data()) return if path == '/recognize_folder': self.__recognize_folder_request(params) return if path == '/generate_faces': self.__generate_faces_request(params) return if path == '/match': self.__match_request(params) return if path == '/clusterize': self.__clusterize_request(params) return if path == '/get_faces_by_face': self.__get_faces_by_face_request(params, self.__form_data()) return if path == '/stop': self.__stop_request(params) return if path == '/clean_cache': self.__clean_cache() return except Exception as ex: self.__server_error_response(str(ex)) log.exception(ex)
def __save_debug_images( self, encoded_faces, media, debug_out_folder, debug_out_file_name, is_video=False, skip_face_gen=False): if is_video: encoded_faces = tools.reduce_faces_from_video( encoded_faces, self.__min_video_face_count) for enc in encoded_faces: name = enc['name'] if name == '': name = 'unknown_00000' out_folder = os.path.join(debug_out_folder, name) top, right, bottom, left = enc['box'] prefix = '{}_{:03d}'.format(name, int(enc['dist'] * 100)) out_filename = os.path.join( out_folder, f'{prefix}_{debug_out_file_name}_{left}x{top}.jpg') if self.__cdb is not None: if not self.__cdb.check_face(enc['face_id']): out_stream = io.BytesIO() tools.save_face(out_stream, media.get(enc['frame']), enc, self.__debug_out_image_size, media.filename()) self.__cdb.save_face(enc['face_id'], out_stream.getvalue()) log.debug(f"face {enc['face_id']} cached") if not skip_face_gen: self.__cdb.add_to_cache(enc['face_id'], out_filename) elif not skip_face_gen: self.__make_debug_out_folder(out_folder) tools.save_face(out_filename, media.get(enc['frame']), enc, self.__debug_out_image_size, media.filename()) log.debug(f'face saved to: {out_filename}')
def __file_request(self, path, params): if path[0] == '/': path = path[1:] path = urllib.parse.unquote(path) if path.startswith('cache/'): fname = os.path.join(self.server.face_cache_path(), path[6:]) if self.server.cdb() is not None: data = self.server.cdb().get_from_cache(fname) if data is not None: self.__send_blob(data, 'image/jpeg', params) else: self.__not_found_response() log.debug(f'File in cache not found: {fname}') return elif path.startswith('pattern/'): fname = self.server.patterns().fullpath(path[8:]) else: fname = os.path.join(self.server.web_path(), path) self.__send_file(fname, params)
def encode_faces(self, image): boxes = face_recognition.face_locations(image, model=self.__model) if not boxes: return [] filtered_boxes = [] for box in boxes: (top, right, bottom, left) = box face_image = image[top:bottom, left:right] (height, width) = face_image.shape[:2] if height < self.__min_size or width < self.__min_size: log.debug(f'Skip too small face: {height}x{width}') continue gray = cv2.cvtColor(face_image, cv2.COLOR_BGR2GRAY) fm = cv2.Laplacian(gray, cv2.CV_64F).var() if fm < 50: log.debug(f'Skip too blurry face: {fm}') continue filtered_boxes.append(box) if len(filtered_boxes): if self.__step_stage_face(len(filtered_boxes)): return [] encodings, landmarks, profile_angles = self.__encoder.encode( image, filtered_boxes) res = [{'encoding': e, 'box': b, 'frame': 0, 'landmarks': l, 'profile_angle': pa} for e, l, b, pa in zip(encodings, landmarks, filtered_boxes, profile_angles)] res = self.__filter_encoded_faces(res) else: res = [] return res
def __filter_encoded_faces(self, encoded_faces): res = [] for enc in encoded_faces: if 'profile_angle' in enc and enc['profile_angle'] is not None and \ enc['profile_angle'] > self.__max_face_profile_angle: log.debug(f"Skip profile face: {enc['profile_angle']}") continue if not tools.test_landmarks(enc['landmarks']): log.debug(f'Skip incorrect landmark') continue res.append(enc) log.debug(f"profile face: {enc['profile_angle']}") return res
def get_faces_by_face(self, filename, debug_out_folder, remove_file=False): if self.__init_stage('get_faces_by_face', locals()): return image = tools.LazyImage(filename, self.__max_size) encoded_faces = self.encode_faces(image.get()) face = encoded_faces[0] log.debug(f'found face: {face}') all_encodings = self.__db.get_all_encodings(self.__max_workers) res = [r for r in self.__executor.map( self.__encoder.distance, all_encodings[0], itertools.repeat(face['encoding']))] distances = numpy.concatenate(res) filtered = [] for dist, info in zip(distances, all_encodings[1]): if dist < 0.4: filtered.append((dist, info)) filtered.sort() log.debug(f'{len(filtered)} faces matched') self.__start_stage(len(filtered)) for dist, info in filtered: if self.__step_stage(): break fname, face = info face['dist'] = dist media = tools.load_media(fname, self.__max_size, self.__max_video_frames, self.__video_frames_step) debug_out_file_name = self.__extract_filename(fname) self.__save_debug_images( (face,), media, debug_out_folder, debug_out_file_name) self.__step_stage_face() if remove_file: log.debug(f'removing temp file: {filename}') os.remove(filename)
def cuda_init(tf_memory_limit=0): if not has_cuda(): log.debug('cuda disabled') return try: import tensorflow as tf log.debug('cuda init') gpu = tf.config.experimental.list_physical_devices('GPU')[0] if tf.config.experimental.get_memory_growth(gpu): return tf.config.experimental.set_memory_growth(gpu, True) if tf_memory_limit: log.debug(f'set cuda memory limit: {tf_memory_limit}') tf.config.experimental.set_virtual_device_configuration( gpu, [tf.config.experimental.VirtualDeviceConfiguration( memory_limit=tf_memory_limit)]) except ModuleNotFoundError as ex: log.info('tensorflow disabled, skip initialisation')
def test_landmarks(l): if l is None: log.debug('Empty landmarks') return False if 'chin' not in l: log.debug('landmarks without chin') return True size = bound_size(l['chin']) if not test_line_angle(l['chin']): log.debug('landmarks chin angle test failed') return False if bound_size(l['left_eye']) >= size / 4 or \ bound_size(l['right_eye']) >= size / 4: log.debug('landmarks eye size test failed') return False if bound_size(l['left_eyebrow']) >= size / 2 or \ bound_size(l['right_eyebrow']) >= size / 2: log.debug('landmarks eyebrow size test failed') return False if bound_size(l['nose_tip']) >= size / 4 or \ bound_size(l['nose_bridge']) >= size / 2: log.debug('landmarks nose size test failed') return False return True
def get(self, dummy_frame_num=0): if self.__image is None: log.debug(f'LazyImage load: {self.__image_file}') self.__image = read_image(self.__image_file, self.__max_size) return self.__image
def cuda_release(): import torch log.debug('cuda release') torch.cuda.empty_cache()