Пример #1
0
def get_xs_and_ys(thres_and_met_to_f_measure, thres=False, metric=False):
    if thres:
        ind = 1
    elif metric:
        ind = 0
    else:
        log_error('thres or metric must be true')
        return

    param_f_measure = [
        (thres_met.split('__')[ind], f_measure)
        for thres_met, f_measure in thres_and_met_to_f_measure.items()
    ]
    param_f_measure = [(float(param.lstrip('LT')), float(f_measure))
                       for param, f_measure in param_f_measure]
    # TODO: Incorporate variance, too?

    # thres_met_f_measures_groups_dict = group_pairs(param_f_measure, ret_dict=True)
    # # map_dict_vals(thres_met_f_measures_groups_dict, func=np.mean)
    # # return thres_met_f_measures_groups_dict
    #
    # thres_met_f_measures_groups = sorted(thres_met_f_measures_groups_dict.items())

    # grouped_thres_met_f_measures = defaultdict(list)
    # for met, f_measures in groupby(met_f_measure, key=lambda mf: mf[0]):
    #     grouped_thres_met_f_measures[met].append(np.mean(f_measures))
    # grouped_thres_met_f_measures = sorted(grouped_thres_met_f_measures.items())

    xs = get_every_nth_item(param_f_measure, n=0)
    ys = get_every_nth_item(param_f_measure, n=1)
    return list(xs), list(ys)
Пример #2
0
    def store_embedding_row_dicts(con):
        print('----- get_embedding_row_dicts -----')
        # TODO: Also auto-increment emb_id etc.
        embedding_id = initial_max_embedding_id + 1
        for img_id, (img_path, img_name, img) in get_counted_img_loader():
            print_progress(img_id, 'image')

            last_modified = datetime.datetime.fromtimestamp(round(os.stat(img_path).st_mtime))
            if check_if_known and (img_name, last_modified) in imgs_names_and_date:
                continue

            DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified,
                                  path_to_local_db=path_to_local_db, con=local_con, close_connections=False)
            DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False)

            face = Models.altered_mtcnn.forward_return_results(img)
            if face is None:
                log_error(f"no faces found in image '{img_path}'")
                continue

            embedding_row_dict = {Columns.cluster_id.col_name: 'NULL',
                                  Columns.embedding.col_name: face_to_embedding(face),
                                  Columns.thumbnail.col_name: face,
                                  Columns.image_id.col_name: img_id,
                                  Columns.embedding_id.col_name: embedding_id}
            DBManager.store_embedding(embedding_row_dict, con=con, close_connections=False)
            embedding_id += 1
Пример #3
0
 def get_command(cls, cmd_name):
     try:
         cmd = cls.commands[cmd_name]
     except KeyError:
         log_error(f"could not remove unknown command '{cmd_name}'")
         return None
     return cmd
Пример #4
0
    def store_embedding_row_dicts(con):
        # TODO: Also auto-increment emb_id etc.
        max_embedding_id = initial_max_embedding_id
        for img_id, (img_abs_path, img_rel_path,
                     img) in get_counted_img_loader():
            # TODO: Implement automatic deletion cascade! (Using among other things on_conflict clause and FKs)
            #       ---> Done?
            # Check if image already stored --> don't process again
            # known = (name, last modified) as a pair known for this director
            last_modified = datetime.datetime.fromtimestamp(
                round(os.stat(img_abs_path).st_mtime))
            if check_if_known and (img_rel_path,
                                   last_modified) in imgs_rel_paths_and_dates:
                continue

            DBManager.store_image(img_id=img_id,
                                  rel_file_path=img_rel_path,
                                  last_modified=last_modified,
                                  path_to_local_db=path_to_local_db,
                                  con=local_con,
                                  close_connections=False)
            DBManager.store_image_path(img_id=img_id,
                                       path_id=path_id,
                                       con=central_con,
                                       close_connections=False)

            faces = Models.altered_mtcnn.forward_return_results(img)
            if not faces:
                log_error(f"no faces found in image '{img_abs_path}'")
                continue

            # TODO: Better way to create these row_dicts?
            embeddings_row_dicts = [{
                Columns.cluster_id.col_name:
                'NULL',
                Columns.embedding.col_name:
                face_to_embedding(face),
                Columns.thumbnail.col_name:
                face,
                Columns.image_id.col_name:
                img_id,
                Columns.embedding_id.col_name:
                embedding_id
            } for embedding_id, face in enumerate(faces,
                                                  start=max_embedding_id + 1)]
            DBManager.store_embeddings(embeddings_row_dicts,
                                       con=con,
                                       close_connections=False)
            max_embedding_id += len(faces)
Пример #5
0
def edit_labels(cluster_dict, **kwargs):
    # TODO: Refactor
    # TODO: Include option to delete people (and remember that in case same dir is read again? --> Probs optional)

    if not cluster_dict:
        log_error('no clusters found, no labels to edit')
        return

    get_cluster_decision = partial(get_user_decision, 'Would you like to choose another cluster?')
    get_face_decision = partial(get_user_decision, 'Would you like to relabel another face in this cluster?')
    # TODO: Nicer parameter passing?
    get_label_scope_decision = partial(get_user_decision,
                                       'Should the whole cluster receive that label or just the picture?',
                                       choices_strs=('[c]luster', '[p]icture'), valid_choices=('c', 'p'))

    continue_choosing_cluster = ''
    while continue_choosing_cluster != 'n':
        cluster = user_choose_cluster(cluster_dict)
        if cluster is None:
            continue_choosing_cluster = get_cluster_decision()
            continue
        continue_choosing_face = ''
        while continue_choosing_face != 'n':
            try:
                embedding_id = user_choose_embedding_id(cluster)
            except IncompleteDatabaseOperation:
                continue
            if embedding_id is None:
                # User *doesn't* want to relabel another face in this cluster!
                break
            new_label = user_choose_face_label(cluster.label)
            if new_label is None:
                continue_choosing_face = get_face_decision()
                continue

            label_scope = get_label_scope_decision()
            try:
                if label_scope == 'c':
                    set_cluster_label(cluster, new_label)
                else:
                    set_picture_label(embedding_id, new_label, cluster, cluster_dict)
            except IncompleteDatabaseOperation:
                pass  # TODO: error notification here!!!

            # Auto-stop choosing faces if cluster is empty or consists of only one face
            continue_choosing_face = get_face_decision() if cluster.get_size() > 2 else 'n'
        continue_choosing_cluster = get_cluster_decision()
Пример #6
0
def user_choose_images_path():
    images_path = input(
        'Please enter a path with images of people you would like to add.\n')
    while not os.path.exists(images_path):
        log_error(f"unable to find path '{images_path}'")
        print("\nPlease try again.")
        images_path = input(
            'Please enter a path with images of people you would like to add.\n'
        )

    # TODO: Implement check_if_known question(?)
    # check_if_known_decision = get_user_decision(
    #    "Should already processed images be processed again? This can be useful if for example some files have changed"
    #    " in a way the program doesn't recognize, or some faces from these images have been deleted and you would like"
    #    " to make them available again."
    # )
    # check_if_known = (check_if_known_decision == "n")
    return images_path
Пример #7
0
    def remove_embedding_by_id(self, embedding_id):
        try:
            embedding = self.embeddings_dict.pop(embedding_id)
        except KeyError:
            log_error(f'embedding with id {embedding_id} not found.')
            return

        old_num_embeddings = self.num_embeddings
        self.num_embeddings -= 1

        # TODO: Check the math!!!

        # (old_center is a uniformly weighted sum of the old embeddings)
        try:
            self.center_point = (old_num_embeddings * self.center_point -
                                 embedding) / self.num_embeddings
        except ZeroDivisionError:  # num_embeddings is 0
            self.center_point = None
Пример #8
0
        def store_embedding_row_dicts(con):
            max_embedding_id = initial_max_embedding_id
            for img_id, (img_path, img_name, img) in get_counted_img_loader():
                # Check if image already stored --> don't process again
                # known = (name, last modified) as a pair known for this director
                last_modified = datetime.datetime.fromtimestamp(
                    round(os.stat(img_path).st_mtime))
                if check_if_known and (img_name,
                                       last_modified) in imgs_names_and_date:
                    continue

                DBManager.store_image(img_id=img_id,
                                      rel_file_path=img_name,
                                      last_modified=last_modified,
                                      path_to_local_db=path_to_local_db,
                                      con=local_con,
                                      close_connections=False)
                DBManager.store_image_path(img_id=img_id,
                                           path_id=path_id,
                                           con=central_con,
                                           close_connections=False)

                faces = Models.altered_mtcnn.forward_return_results(img)
                if not faces:
                    log_error(f"no faces found in image '{img_path}'")
                    continue

                embeddings_row_dicts = [{
                    Columns.cluster_id.col_name:
                    'NULL',
                    Columns.embedding.col_name:
                    face_to_embedding(face),
                    Columns.thumbnail.col_name:
                    face,
                    Columns.image_id.col_name:
                    img_id,
                    Columns.embedding_id.col_name:
                    embedding_id
                } for embedding_id, face in enumerate(
                    faces, start=max_embedding_id + 1)]
                DBManager.store_embeddings(embeddings_row_dicts,
                                           con=con,
                                           close_connections=False)
                max_embedding_id += len(faces)
Пример #9
0
    def clear_data_measure(cluster_dict):
        local_db_dir_path = DATASET_PATH
        path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path)

        def clear_data_worker(central_con, local_con):
            DBManager.clear_local_tables(path_to_local_db,
                                         con=local_con,
                                         close_connections=False)
            clear_central_tables(con=central_con, close_connections=False)
            overwrite_dict(cluster_dict, dict())

        try:
            DBManager.connection_wrapper(clear_data_worker,
                                         path_to_local_db=path_to_local_db,
                                         with_central=True,
                                         with_local=True)
        except IncompleteDatabaseOperation as e:
            print('clear_data_measure error')
            log_error(e)
Пример #10
0
def user_choose_cluster(cluster_dict):
    # TODO: Refactor
    cluster_ids = cluster_dict.get_cluster_ids()
    print_cluster_ids(cluster_dict)

    get_user_cluster_id = partial(get_user_input_of_type,
                                  class_=int,
                                  obj_name='cluster id',
                                  allow_empty=True)
    chosen_cluster_id = get_user_cluster_id()
    while chosen_cluster_id is not None and chosen_cluster_id not in cluster_ids:
        log_error(
            f'cluster "{chosen_cluster_id}" not found; Please try again.')
        print_cluster_ids(cluster_dict)
        chosen_cluster_id = get_user_cluster_id()

    if chosen_cluster_id is None:
        return

    chosen_cluster = cluster_dict.get_cluster_by_id(chosen_cluster_id)
    return chosen_cluster
Пример #11
0
    def reclassify_worker(con):
        # all operations in worker, so if any DB operation raises error, it is caught
        if embeddings_with_ids is not None:
            local_embeddings_with_ids = embeddings_with_ids
        else:
            local_embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True))

        if not local_embeddings_with_ids:
            log_error('no embeddings found, nothing to edit')
            return

        new_cluster_dict = DBManager.get_certain_clusters()
        core_algorithm = CoreAlgorithm()
        clustering_result = core_algorithm.cluster_embeddings(embeddings=local_embeddings_with_ids,
                                                              existing_clusters_dict=new_cluster_dict,
                                                              should_reset_cluster_ids=True,
                                                              final_clusters_only=False)
        _, modified_clusters_dict, removed_clusters_dict = clustering_result
        DBManager.overwrite_clusters(new_cluster_dict, removed_clusters_dict, no_new_embs=True,
                                     clear_clusters=True, con=con, close_connections=False)
        overwrite_dict(cluster_dict, new_cluster_dict)
Пример #12
0
def user_choose_local_db_dir_path():
    # TODO: Refactor, use user_choose function!
    local_db_dir_path = input(
        'Please enter a path containing a local table you would like to clear.\n'
    )
    # local_db_dir_path = (r'C:\Users\Mischa\Desktop\Uni\20-21 WS'
    #                      r'\Bachelor\Programming\BA\Logic\my_test\facenet_Test\group_imgs')
    while True:
        if not local_db_dir_path:
            local_db_dir_path = None
            break
        elif not os.path.exists(local_db_dir_path):
            log_error(f"unable to find path '{local_db_dir_path}'")
        elif not DBManager.is_local_db_in_dir(local_db_dir_path):
            log_error(
                f"unable to find local database file '{...}' in path '{local_db_dir_path}'"
            )
        else:
            break
        print("\nPlease try again.")
        local_db_dir_path = input(
            'Please enter a path with images of people you would like to add.\n'
        )
    return local_db_dir_path
Пример #13
0
    def process_images_dir_measure(cluster_dict, n):
        images_path = DATASET_PATH
        try:
            print('------ PROCESSING FACES')
            process_faces_measure(images_path, n)
            print('------ DONE PROCESSING')
        except IncompleteDatabaseOperation as e:
            print('process_images_dir_measure error')
            log_error(e)
            return

        cluster_dict_copy = cluster_dict.copy()

        def cluster_processed_faces(con):
            embeddings_with_ids = list(
                DBManager.get_all_embeddings(with_ids=True))

            # TODO: Call reclassify handler here?
            # TODO: Clear existing clusters? Issues with ids etc.????
            core_algorithm = CoreAlgorithm()
            # passing result cluster dict already overwrites it
            clustering_result = core_algorithm.cluster_embeddings(
                embeddings_with_ids,
                existing_clusters_dict=cluster_dict,
                should_reset_cluster_ids=True,
                final_clusters_only=False)
            _, modified_clusters_dict, removed_clusters_dict = clustering_result
            DBManager.overwrite_clusters_simplified(modified_clusters_dict,
                                                    removed_clusters_dict,
                                                    con=con,
                                                    close_connections=False)

        try:
            DBManager.connection_wrapper(cluster_processed_faces)
        except IncompleteDatabaseOperation:
            overwrite_dict(cluster_dict, cluster_dict_copy)
Пример #14
0
 def get_cluster_by_id(self, cluster_id):
     try:
         return self[cluster_id]
     except KeyError:
         log_error(f"no cluster with id '{cluster_id}' found")
         return None
Пример #15
0
 def remove_command(cls, cmd_name):
     # TODO: needed?
     try:
         cls.commands.pop(cmd_name)
     except KeyError:
         log_error(f"could not remove unknown command '{cmd_name}'")
Пример #16
0
def call_handler(handler, *args, **kwargs):
    try:
        return handler(*args, **kwargs)
    except Exception as e:
        log_error(e)