示例#1
0
def get_svd_image_data_from_folder(relative_folder_path, k=10):
    """
    :param relative_folder_path: here give the path with a '/' ahead e.g. '/Labelled/Set2'
    :return:
    data_matrix after applying SVD on it and also the image names present inside the relative_folder_path
    """
    image_names = get_image_names_in_a_folder(relative_folder_path)
    db_conn = DatabaseConnection()
    data_image_dict = db_conn.HOG_descriptor_from_image_ids(image_names)
    data_matrix = data_image_dict['data_matrix']
    svd_obj = SingularValueDecomposition()
    svd_image_data = svd_obj.get_transformed_data(data_matrix, k)
    return svd_image_data, data_image_dict['images']
    def get_PPR_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        topology_images = read_from_pickle('test_dataset.pickle')
        image_names = get_image_names_from_tuples(topology_images)
        db_conn = DatabaseConnection()
        data_image_dict = db_conn.HOG_descriptor_from_image_ids(image_names)
        data_matrix = data_image_dict['data_matrix']
        image_names = data_image_dict['images']
        svd_obj = SingularValueDecomposition()
        svd_image_data = svd_obj.get_transformed_data(data_matrix, 8)  # change this for 11K images

        pg_obj = PageRank()
        image_similarity_matrix = pg_obj.get_image_similarity_matrix_for_top_k_images(6, svd_image_data)
        seed_vector = pg_obj.get_seed_vector(rel_items, image_names, irl_items)
        pie = pg_obj.get_page_rank_eigen_vector(image_similarity_matrix, seed_vector)
        new_rank_list = pg_obj.get_top_K_images_based_on_scores(pie, image_names, m)

        return new_rank_list
class RelevanceFeedback:
    def __init__(self):
        self.database_connection = DatabaseConnection()
        self.conn = self.database_connection.get_db_connection()
        print('Initiating RelevanceFeedback....')

    def compute_new_query_vector(self, q_old, relevant_items, irrel_items, alpha=0.3, beta=0.65, gamma=0.05):
        print('Computing new query vector.....')

        avg_rel_vec = np.zeros(q_old.shape)
        avg_irl_vec = np.zeros(q_old.shape)

        # Aggregating relevant items
        for item in relevant_items:
            vector = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            avg_rel_vec = avg_rel_vec + vector

        # Aggregating irrelevant items
        for item in irrel_items:
            vector = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            avg_irl_vec = avg_irl_vec + vector

        if len(relevant_items) != 0:
            avg_rel_vec = avg_rel_vec / len(relevant_items)

        if len(irrel_items) != 0:
            avg_irl_vec = avg_irl_vec / len(irrel_items)

        q_new = alpha * q_old + beta * avg_rel_vec - gamma * avg_irl_vec
        return q_new

    def get_user_feedback(self, init_rank_list, q_name, caller='misc'):
        print('Taking user feedback now...')
        rel_items = []
        irl_items = []

        if caller == 'prb':
            for item in init_rank_list[0]:
                if item[0] == q_name:
                    continue
                else:
                    print(f'Is image {item[0]} relevant ? (y/n)')
                    if input() is 'y':
                        rel_items.append(item[0])
                    else:
                        irl_items.append(item[0])
        else:
            for item in init_rank_list:
                if item[0] == q_name:
                    continue
                else:
                    print(f'Is image {item[0]} relevant ? (y/n)')
                    if input() is 'y':
                        rel_items.append(item[0])
                    else:
                        irl_items.append(item[0])

        return rel_items, irl_items

    def get_SVM_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        X_train, Y_train = self.create_X_Y_as_np_matrix(rel_items=rel_items, irl_items=irl_items)

        # Training SVM classifier
        svm = support_vector_machine.SupportVectorMachine()
        svm.fit(X=X_train, y=Y_train)

        # Now getting more test data from LSH indexes
        test_dataset = read_from_pickle('test_dataset.pickle')
        X_test, imageNames = self.create_X_test_as_np_matrix(test_dataset=test_dataset)
        Y_pred = svm.predict(u=X_test)
        relevant_pred_img_names = [imageNames[i] for i in range(0, len(Y_pred)) if Y_pred[i] == 1]
        length_relevant_images = len(relevant_pred_img_names)
        if length_relevant_images < m:
            irr_image_names = [imageNames[i] for i in range(0, m - length_relevant_images) if Y_pred[i] == -1]
            relevant_pred_img_names.extend(irr_image_names)
        new_obj_feature_matrix = self.database_connection.HOG_descriptor_from_image_ids(
            image_ids=relevant_pred_img_names)

        new_rank_list = get_most_m_similar_images(data_with_images=new_obj_feature_matrix,
                                                  query_image_feature_vector=q_new, m=m)
        return new_rank_list

    def get_DTC_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        X_train, Y_train = self.create_X_Y_as_np_matrix(rel_items=rel_items, irl_items=irl_items)

        # Training SVM classifier
        dtl = decision_tree_learning.DecisionTreeLearning()
        dtl.fit(X=X_train, y=Y_train)

        # Now getting more test data from LSH indexes
        test_dataset = read_from_pickle('test_dataset.pickle')
        X_test, imageNames = self.create_X_test_as_np_matrix(test_dataset=test_dataset)
        Y_pred = dtl.predict(u=X_test)
        relevant_pred_img_names = [imageNames[i] for i in range(0, len(Y_pred)) if Y_pred[i] == 1]

        length_relevant_images = len(relevant_pred_img_names)
        if length_relevant_images < m:
            irr_image_names = [imageNames[i] for i in range(0, m - length_relevant_images) if Y_pred[i] == -1]
            relevant_pred_img_names.extend(irr_image_names)

        new_obj_feature_matrix = self.database_connection.HOG_descriptor_from_image_ids(
            image_ids=relevant_pred_img_names)

        new_rank_list = get_most_m_similar_images(data_with_images=new_obj_feature_matrix,
                                                  query_image_feature_vector=q_new, m=m)
        return new_rank_list

    def get_PPR_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        topology_images = read_from_pickle('test_dataset.pickle')
        image_names = get_image_names_from_tuples(topology_images)
        db_conn = DatabaseConnection()
        data_image_dict = db_conn.HOG_descriptor_from_image_ids(image_names)
        data_matrix = data_image_dict['data_matrix']
        image_names = data_image_dict['images']
        svd_obj = SingularValueDecomposition()
        svd_image_data = svd_obj.get_transformed_data(data_matrix, 8)  # change this for 11K images

        pg_obj = PageRank()
        image_similarity_matrix = pg_obj.get_image_similarity_matrix_for_top_k_images(6, svd_image_data)
        seed_vector = pg_obj.get_seed_vector(rel_items, image_names, irl_items)
        pie = pg_obj.get_page_rank_eigen_vector(image_similarity_matrix, seed_vector)
        new_rank_list = pg_obj.get_top_K_images_based_on_scores(pie, image_names, m)

        return new_rank_list

    def get_init_ranking(self, obj_feature_matrix,
                         q):  # For SVM, DTC, PPR.... check calculate_init_prob_similarity for Probab based
        svd = singular_value_decomposition.SingularValueDecomposition()
        data_matrix = obj_feature_matrix['data_matrix']
        U, S, Vt = svd.get_latent_semantics(data_matrix=data_matrix, n_components=25)
        init_rank_list = get_most_m_similar_images(data_with_images=obj_feature_matrix, query_image_feature_vector=q,
                                                   Vt=Vt, m=5)
        return init_rank_list, Vt

    # rel_items,irl_items=rf.get_user_feedback(init_rank_list=init_rank_list,q_name=q_name)
    # q_new=rf.compute_new_query_vector(q_old=q,relevant_items=rel_items,irrel_items=irl_items)
    # new_rank_list=get_most_m_similar_images(data_with_images=obj_feature_matrix,query_image_feature_vector=q_new,Vt=Vt,m=5)

    def get_Vt(self, obj_feature_matrix):  # For SVM, DTC, PPR.... check calculate_init_prob_similarity for Probab based
        svd = singular_value_decomposition.SingularValueDecomposition()
        data_matrix = obj_feature_matrix['data_matrix']
        U, S, Vt = svd.get_latent_semantics(data_matrix=data_matrix, n_components=25)
        return Vt

    def get_probabilistic_relevance_feedback(self, D_matrix, images, q_name, m):
        n_i = self.calculate_n_i(D_matrix=D_matrix)
        init_scores = self.calculate_initial_prob_similarity(D_matrix=D_matrix, images=images, n_i=n_i)
        rel_items, irl_items = self.get_user_feedback(init_rank_list=[init_scores[:m]], q_name=q_name, caller='prb')
        new_rank_list = self.calculate_feedback_prob_similarity(D_matrix=D_matrix, images=images,
                                                                relevant_items=rel_items, n_i=n_i)
        return new_rank_list[:m]

    def calculate_feedback_prob_similarity(self, D_matrix, images, relevant_items, n_i):
        N = D_matrix.shape[0]
        R = len(relevant_items)
        n_i = n_i[0]
        r_i = self.calculate_r_i(D_matrix=D_matrix, images=images, relevant_items=relevant_items)
        r_i = r_i[0]

        feedback_scores = {}
        j = 0
        for d in D_matrix:
            sim_score = 0
            for i in range(0, len(n_i)):
                numerator = (r_i[i] + 0.5) / (R + 1 - r_i[i])
                denominator = (n_i[i] - r_i[i] + 0.5) / (N - R + 1 - n_i[i] + r_i[i])
                sim_score = sim_score + d[i] * math.log2(numerator / denominator)

            feedback_scores[images[j]] = sim_score
            j += 1

        feedback_scores = sorted(feedback_scores.items(), key=lambda k: k[1], reverse=True)
        return feedback_scores

    def calculate_initial_prob_similarity(self, D_matrix, images, n_i):
        N = D_matrix.shape[0]
        n_i = n_i[0]

        init_scores = {}

        j = 0
        for d in D_matrix:
            sim_score = 0
            for i in range(0, len(n_i)):
                sim_score = sim_score + d[i] * math.log2((N - n_i[i] + 0.5) / (n_i[i] + 0.5))

            init_scores[images[j]] = sim_score
            j += 1

        init_scores = sorted(init_scores.items(), key=lambda k: k[1], reverse=True)

        return init_scores

    def calculate_r_i(self, D_matrix, images, relevant_items):
        r_i = np.zeros((1, D_matrix.shape[1]))
        i = 0
        for row in D_matrix:
            temp = [1 if row[x] > 0 and images[i] in relevant_items else 0 for x in range(0, len(row))]
            r_i = r_i + np.array(temp).T
            i += 1

        return r_i

    def calculate_n_i(self, D_matrix):

        n_i = np.zeros((1, D_matrix.shape[1]))
        for row in D_matrix:
            temp = [1 if row[x] > 0 else 0 for x in range(0, len(row))]
            n_i = n_i + np.array(temp).T

        return n_i

    def create_X_Y_as_np_matrix(self, rel_items, irl_items):
        X = []
        Y = []

        # Adding relevant items in X and Y
        for item in rel_items:
            fv = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            X.append(fv.reshape(fv.shape[1]))
            Y.append(1)

        # Adding irrelevant items in X and Y
        for item in irl_items:
            fv = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            X.append(fv.reshape(fv.shape[1]))
            Y.append(-1)

        return np.array(X), np.array(Y)

    def create_X_test_as_np_matrix(self, test_dataset):
        X = []
        imageNames = []
        # Adding relevant items in X and Y
        for item in test_dataset:
            fv = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item[0])
            X.append(fv.reshape(fv.shape[1]))
            imageNames.append(item[0])

        return np.array(X), imageNames