Пример #1
0
    def get_predicted_labels(self, labelled_folder_path,
                             unlabelled_folder_path):
        labelled_hog_data_table_name = convert_folder_path_to_table_name(
            labelled_folder_path, "histogram_of_gradients")
        unlabelled_hog_data_table_name = convert_folder_path_to_table_name(
            unlabelled_folder_path, "histogram_of_gradients")
        labelled_metadata_table_name = convert_folder_path_to_table_name(
            labelled_folder_path, "metadata")

        db_conn = DatabaseConnection()
        labelled_data_dict = db_conn.get_object_feature_matrix_from_db(
            labelled_hog_data_table_name)
        unlabelled_data_dict = db_conn.get_object_feature_matrix_from_db(
            unlabelled_hog_data_table_name)

        labeled_image_names, labeled_data = labelled_data_dict[
            "images"], labelled_data_dict["data_matrix"]
        unlabeled_image_names, unlabeled_data = unlabelled_data_dict[
            "images"], unlabelled_data_dict["data_matrix"]
        total_data = np.concatenate((labeled_data, unlabeled_data), axis=0)

        total_image_names = labeled_image_names + unlabeled_image_names

        svd_obj = SingularValueDecomposition()
        svd_image_data = svd_obj.get_transformed_data(total_data, k=20)

        labelled_images = db_conn.get_correct_labels_for_given_images(
            tablename=labelled_metadata_table_name, label_type="aspectOfHand")

        dorsal_images = get_filtered_images_by_label(labelled_images, "dorsal")
        palmer_images = get_filtered_images_by_label(labelled_images, "palmar")

        pgr_obj = PageRank()
        image_similarity_matrix = pgr_obj.get_image_similarity_matrix_for_top_k_images(
            6, svd_image_data)
        seed_vector_for_dorsal = pgr_obj.get_seed_vector(
            dorsal_images, total_image_names)
        seed_vector_for_palmer = pgr_obj.get_seed_vector(
            palmer_images, total_image_names)

        pie_with_dorsal = pgr_obj.get_page_rank_eigen_vector(
            image_similarity_matrix, seed_vector_for_dorsal)
        pie_with_palmer = pgr_obj.get_page_rank_eigen_vector(
            image_similarity_matrix, seed_vector_for_palmer)

        ranked_images_using_dorsal = self.get_ranked_images(
            pie_with_dorsal, total_image_names)
        ranked_images_using_palmer = self.get_ranked_images(
            pie_with_palmer, total_image_names)

        images_with_labels = [
            (img, "dorsal") if
            ranked_images_using_dorsal[img] > ranked_images_using_palmer[img]
            else (img, "palmar") for img in unlabeled_image_names
        ]

        correct_labels = db_conn.get_correct_labels_for_given_images(
            image_names=unlabeled_image_names, label_type="aspectOfHand")
        if correct_labels:
            acc = calculate_classification_accuracy(
                convert_tuple_to_dict(images_with_labels),
                convert_tuple_to_dict(correct_labels))

            print("********************************************")
            print("Accuracy = ", acc)
        else:
            acc = "Images not presented in 11 K dataset"

        return images_with_labels, acc
Пример #2
0
class Task1_Classifier:
    def __init__(self):
        # self.no_of_components = 20
        self.db_conn = DatabaseConnection()

    def calculate_latent_semantic_for_label(self, no_of_components, label,
                                            tablename, metadata):
        image_dict = self.db_conn.get_object_feature_matrix_from_db(
            tablename, label, "aspectofhand", metadata)
        image_names = image_dict["images"]
        data_matrix = image_dict["data_matrix"]
        dr_obj = PrincipleComponentAnalysis()
        U, S, Vt = dr_obj.get_latent_semantics(data_matrix, no_of_components)
        return image_names, Vt

    def classify_images_folder(self, image_names, data_matrix,
                               dorsal_semantics, palmar_semantics):
        dorsal_space = np.matmul(data_matrix, np.transpose(dorsal_semantics))
        dorsal_distance = np.linalg.norm(dorsal_space, axis=1, keepdims=True)

        palmar_space = np.matmul(data_matrix, np.transpose(palmar_semantics))
        palmar_distance = np.linalg.norm(palmar_space, axis=1, keepdims=True)

        label_flags = (dorsal_distance > palmar_distance).tolist()
        print(dorsal_distance, label_flags)
        predicted_labels = []
        for images_name, label in zip(image_names, label_flags):
            if label[0]:

                predicted_labels.append((images_name, DORSAL))
            else:
                predicted_labels.append((images_name, PALMAR))

        return predicted_labels

    def get_label_for_folder(self,
                             relative_input_folder_path,
                             relative_output_folder_path,
                             no_of_components=20):
        input_tablename = convert_folder_path_to_table_name(
            relative_input_folder_path, "histogram_of_gradients")
        output_tablename = convert_folder_path_to_table_name(
            relative_output_folder_path, "histogram_of_gradients")
        metadata_tablename = convert_folder_path_to_table_name(
            relative_input_folder_path)

        image_names = get_image_names_in_a_folder(relative_input_folder_path)
        #print(image_names)
        labelled_images = self.db_conn.get_correct_labels_for_given_images(
            image_names, "aspectofhand")
        # print(labelled_images)
        print(len(labelled_images))

        dorsal_images, dorsal_semantics = self.calculate_latent_semantic_for_label(
            no_of_components, DORSAL, input_tablename, metadata_tablename)
        palmar_images, palmar_semantics = self.calculate_latent_semantic_for_label(
            no_of_components, PALMAR, input_tablename, metadata_tablename)

        query_image_names = get_image_names_in_a_folder(
            relative_output_folder_path)

        query_image_dict = self.db_conn.get_object_feature_matrix_from_db(
            output_tablename)
        query_data_matrix = query_image_dict['data_matrix']
        print(len(query_image_names))

        predicted_labels = self.classify_images_folder(query_image_names,
                                                       query_data_matrix,
                                                       dorsal_semantics,
                                                       palmar_semantics)

        query_list_of_labels = self.db_conn.get_correct_labels_for_given_images(
            query_image_names, "aspectofhand", "metadata")

        prediction = sorted(predicted_labels, key=lambda k: k[0])
        if query_list_of_labels:
            query_list_of_labels = sorted(query_list_of_labels,
                                          key=lambda k: k[0])
            accuracy = 0.0
            for (image_name, predicted_label), (image2, correct_label) in zip(
                    prediction, query_list_of_labels):
                # print(image_name, predicted_label,image2, correct_label)
                correct_label = correct_label.split(' ')[0]
                if (image_name == image2 and correct_label == predicted_label):
                    accuracy += 1

            accuracy = 100 * accuracy / float(len(query_image_names))
            print("The accuracy is " + str(accuracy))
        else:
            print("Images not presented in 11 K dataset")

        print(prediction)
        return prediction
class Image_Clustering:
    def __init__(self):
        self.db_conn = DatabaseConnection()
        self.no_of_dimensions = 10

    def intialize_cluster_centres(self, points, no_of_clusters):
        no_of_points = len(points)
        # first_point = round((np.random.random((1)) * no_of_points).tolist()[0])
        # print(first_point)
        # list_of_centre = [points[first_point]]
        list_of_centre = [points[0]]
        for centre in range(2, no_of_clusters):
            max_avg_dist = 0
            for point in points:
                if point in list_of_centre:
                    continue
                avg_dist = 0.0
                for centre in list_of_centre:
                    avg_dist += np.linalg.norm(np.subtract(np.array(centre), np.array(point)))
                avg_dist = avg_dist / len(list_of_centre)
                if avg_dist > max_avg_dist:
                    max_avg_dist = avg_dist
                    point_as_centre = point
            list_of_centre.append(point_as_centre)
        return list_of_centre

    def k_means(self, points, no_of_centres):

        # list_of_centre = (points.shape[0] * np.random.rand(no_of_centres,1)).tolist()
        points = points.tolist()
        list_of_centre = self.intialize_cluster_centres(points, no_of_centres)
        clusters_points = {}
        old_clusters_centroid = {}

        clusters_centroid = { index:points[int(list_of_centre[index][0])] for index in range(len(list_of_centre))}

        while not clusters_centroid == old_clusters_centroid:

            # allocate points to cluster centroid
            clusters_points = {}
            for point in points:
                min_distance = np.inf
                point_in_cluster = -1

                for cluster_id in clusters_centroid:
                    centroid = clusters_centroid[cluster_id]
                    distance = np.linalg.norm(np.subtract(np.array(point), np.array(centroid)))

                    if distance < min_distance:
                        point_in_cluster = cluster_id
                        min_distance = distance

                if point_in_cluster in clusters_points:
                    clusters_points[point_in_cluster].append(point)
                else:
                    clusters_points[point_in_cluster] = [point]

            # Update cluster centroid
            old_clusters_centroid = clusters_centroid.copy()
            clusters_centroid = {}

            for cluster_id in clusters_points:
                list_of_points = np.array(clusters_points[cluster_id])
                clusters_centroid[cluster_id] = np.mean(list_of_points, axis=0).tolist()
                # print(cluster_id,clusters_centroid[cluster_id])

        return clusters_centroid  # ,clusters_centroid

    def cluster_images(self, no_of_clusters, relative_input_folder_path, relative_output_folder_path):
        base_tablename = "histogram_of_gradients"

        input_tablename = convert_folder_path_to_table_name(relative_input_folder_path, base_tablename)
        output_tablename = convert_folder_path_to_table_name(relative_output_folder_path, base_tablename)
        input_metadata_tablename = convert_folder_path_to_table_name(relative_input_folder_path)

        image_dict = self.db_conn.get_object_feature_matrix_from_db(input_tablename)
        data_matrix = image_dict['data_matrix']
        image_names = image_dict['images']
        svd_obj = SingularValueDecomposition()
        U, S, Vt = svd_obj.get_latent_semantics(data_matrix, self.no_of_dimensions)

        latent_semantic = np.matmul(U, S).tolist()

        list_of_labels = self.db_conn.get_correct_labels_for_given_images(image_names, "aspectofhand", input_metadata_tablename)
        # print(list_of_labels)
        dorsal_data_matrix = []
        palmar_data_matrix = []

        for image, label in list_of_labels:
            label_update = label.split(' ')[0]
            if label_update == DORSAL:
                dorsal_data_matrix.append(latent_semantic[image_names.index(image)])
            elif label_update == PALMAR:
                palmar_data_matrix.append(latent_semantic[image_names.index(image)])

        palmer_list_of_centers = self.k_means(np.array(palmar_data_matrix), no_of_clusters)
        dorsal_list_of_centers = self.k_means(np.array(dorsal_data_matrix), no_of_clusters)

        # print(palmer_list_of_centers, dorsal_list_of_centers)


        points_in_cluster = []

        for image_name, image_vector in zip(image_names,latent_semantic):
            min_distance = np.inf
            row = ()
            for center in dorsal_list_of_centers:
                distance = np.linalg.norm(np.subtract(np.array(image_vector), np.array(dorsal_list_of_centers[center])))
                if distance < min_distance:
                    row = (image_name, int(center) + 1)
                    min_distance = distance
            for center in palmer_list_of_centers:
                distance = np.linalg.norm(np.subtract(np.array(image_vector), np.array(palmer_list_of_centers[center])))
                if distance < min_distance:
                    row = (image_name, no_of_clusters+int(center) + 1)
                    min_distance = distance
            points_in_cluster.append(row)



        query_image_dict = self.db_conn.get_object_feature_matrix_from_db(output_tablename)
        query_data_matrix = query_image_dict['data_matrix']
        query_image_names = query_image_dict['images']
        query_latent = np.matmul(query_data_matrix,np.transpose(Vt))

        query_iterate = zip(query_image_names, query_latent.tolist())
        prediction = []

        for image, image_vector in query_iterate:
            min_distance = np.inf
            labelled = ""
            for cluster_centre in palmer_list_of_centers.values():
                distance = np.linalg.norm(np.subtract(np.array(image_vector),np.array(cluster_centre)))
                if distance < min_distance:
                    labelled = PALMAR
                    min_distance = distance
            for cluster_centre in dorsal_list_of_centers.values():
                distance = np.linalg.norm(np.subtract(np.array(image_vector),np.array(cluster_centre)))
                if distance<min_distance:
                    labelled = DORSAL
                    min_distance = distance
            prediction.append((image, labelled))

        query_list_of_labels = self.db_conn.get_correct_labels_for_given_images(query_image_names, "aspectofhand", "metadata")
        # print(query_list_of_labels)

        prediction = sorted(prediction, key=lambda k: k[0])
        if query_list_of_labels:
            query_list_of_labels = sorted(query_list_of_labels, key=lambda k: k[0])
            accuracy = 0.0
            for (image_name, predicted_label),(image2, correct_label) in zip(prediction, query_list_of_labels):
                # print(image_name, predicted_label,image2, correct_label)
                correct_label = correct_label.split(' ')[0]
                if(image_name == image2 and correct_label==predicted_label):
                    accuracy += 1

            accuracy = 100*accuracy/float(len(query_image_names))
            print("The accuracy is "+ str(accuracy))
        else:
            print("Images not presented in 11 K dataset")

        points_in_cluster = sorted(points_in_cluster, key=lambda k: k[1])

        return points_in_cluster, prediction
Пример #4
0
def get_train_and_test_dataframes_from_db(train_table,
                                          train_table_metadata,
                                          test_table,
                                          num_dims=None,
                                          algo="svd"):
    images_not_present = False
    label_map = {"dorsal": -1, "palmar": 1}

    # retrieve data
    db = DatabaseConnection()
    train_dataset = db.get_object_feature_matrix_from_db(train_table)
    test_dataset = db.get_object_feature_matrix_from_db(test_table)
    # get out data matrix
    train_data = train_dataset['data_matrix']
    train_images = train_dataset['images']
    test_data = test_dataset['data_matrix']
    test_images = test_dataset['images']

    # svd transform
    if num_dims == None:
        tf_train_data = train_data
        tf_test_data = test_data
    else:
        if algo == "pca":
            svd = PCA(n_components=num_dims)
            tf_train_data = svd.fit_transform(train_data)
            tf_test_data = svd.transform(test_data)
        elif algo == "svd":
            svd = SingularValueDecomposition(num_dims)
            tf_train_data = svd.fit_transform(train_data)
            tf_test_data = svd.transform(test_data)

    # convert list of tuples to dict
    train_labels_map = dict(
        db.get_correct_labels_for_given_images(train_images, 'aspectOfHand',
                                               train_table_metadata))
    result_from_db = db.get_correct_labels_for_given_images(
        test_images, 'aspectOfHand')
    if not result_from_db:
        exp_test_labels_map = None
    else:
        exp_test_labels_map = dict(result_from_db)

    # dataframe setup starts here
    # train_df
    train_col_names = ['imagename', 'hog_svd_descriptor', 'label']
    train_df = pd.DataFrame(columns=train_col_names)

    for i, image in enumerate(train_images):
        temp = train_labels_map[image]
        label = temp.split(' ')[0]
        train_df.loc[len(train_df)] = [
            image, tf_train_data[i], label_map[label]
        ]

    # test_df
    test_col_names = [
        'imagename', 'hog_svd_descriptor', 'expected_label', 'predicted_label'
    ]
    test_df = pd.DataFrame(columns=test_col_names)
    if exp_test_labels_map:
        for i, image in enumerate(test_images):
            temp = exp_test_labels_map[image]
            label = temp.split(' ')[0]
            test_df.loc[len(test_df)] = [
                image, tf_test_data[i], label_map[label], 'null'
            ]
    else:
        for i, image in enumerate(test_images):
            images_not_present = True
            test_df.loc[len(test_df)] = [
                image, tf_test_data[i], 'null', 'null'
            ]

    return train_df, test_df, images_not_present