Python pre_processing示例，Preprocessing.pre_processing Python示例

示例#1

0

显示文件

def interrogate_index_base(img,
                           gamma=0.2,
                           alpha=0.10,
                           tau=2.65,
                           ltp_threshold=0.2,
                           nbr_result=1):
    # loading the saved index base numpy array
    index_base = np.load('index_base.npy', allow_pickle=True)

    # list that contains the distances between the query image and the images in the index base
    image_distance = []

    # applying the pre-processing to the query image
    preprocessed_img = pre_processing(img, gamma, alpha, tau)

    # getting the image descriptor of the query image
    img_descriptor = ltp(preprocessed_img, ltp_threshold)

    # calculating the distance between the query image descriptor and the images descriptors in the index base
    for i in range(len(index_base)):
        image_distance.append([
            index_base[i, 0],
            distance.euclidean(img_descriptor, index_base[i, 1])
        ])

    # sorting the distances by the smallest distance
    ordered_img_distances = sorted(image_distance, key=lambda x: x[1])

    # returning the n images with the shortest distance to the index images
    return ordered_img_distances[0:nbr_result]

示例#2

0

显示文件

def create_index_base(path,
                      gamma=0.2,
                      alpha=0.10,
                      tau=2.65,
                      ltp_threshold=0.2):
    # listing all the images in the directory
    images_list = os.listdir(path)

    # initializing a list that contains the index base ([name of the image, image descriptor]
    index_base = []
    nbr_images = len(images_list)

    # for every images in the image base
    for i in range(nbr_images):

        # reading the image as a gray scale and normalizing it between 0 and 1
        img_full_path = path + '/' + images_list[i]
        img = cv.imread(img_full_path, cv.IMREAD_GRAYSCALE).astype(
            np.float32) / 255.0

        # applying the pre-processing to the current image
        preprocessed_img = pre_processing(img, gamma, alpha, tau)

        # getting the image descriptor for the current image
        img_descriptor = ltp_indexation(preprocessed_img, ltp_threshold)

        # appending the image name and the descriptor to the index base list
        index_base.append([images_list[i], img_descriptor])

        # used for printing the progress on console
        sys.stdout.write('\r')
        sys.stdout.write("[%-20s] %d%%" % ('=' * (i // (nbr_images // 20) + 1),
                                           (i / nbr_images) * 100 + 1))
        sys.stdout.flush()

    # saving the index base as a numpy array
    index_base_np = np.array(index_base)
    # calling the garbage collector
    gc.collect()
    np.save('index_base.npy', index_base_np)

示例#3

0

显示文件

文件： MovieReview.py 项目： yangtaoxf/OpinionMining

    def clean_paragraph(text):
        paragraph = []
        for s in sentences(text):
            paragraph = paragraph + pre_processing(s).split()

        return paragraph

示例#4

0

显示文件

文件： MovieReview.py 项目： yangtaoxf/OpinionMining

def parse_sentences(rdd):
    raw = rdd.zipWithIndex().map(swap_kv)

    data = raw.flatMap(lambda (_id, text): [(_id, pre_processing(s).split()) for s in sentences(text)])
    return data

示例#5

0

显示文件

def interrogate_index_base(img,
                           gamma=0.2,
                           alpha=0.10,
                           tau=2.65,
                           ltp_threshold=0.2,
                           ltp_tau=6,
                           nbr_result=1):
    # loading the saved index base numpy array
    index_base = np.load('index_base.npy', allow_pickle=True)

    # list that contains the distances between the query image and the images in the index base
    image_distance = []

    # applying the pre-processing to the query image
    preprocessed_img = pre_processing(img, gamma, alpha, tau)

    # getting the upper and lower images for query image
    upper_descriptor, lower_descriptor = ltp_query(preprocessed_img,
                                                   ltp_threshold)

    # uniform histogram possible values, taken from "https://en.wikipedia.org/wiki/Local_binary_patterns#Concept",
    uni_hist_keys = [
        0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 24, 28, 30, 31, 32, 48, 56, 60,
        62, 63, 64, 96, 112, 120, 124, 126, 127, 128, 129, 131, 135, 143, 159,
        191, 192, 193, 195, 199, 207, 223, 224, 225, 227, 231, 239, 240, 241,
        243, 247, 248, 249, 251, 252, 253, 254, 255, 'others'
    ]

    # calculating the distance between the query image descriptor and the images descriptors in the index base
    for i in range(len(index_base)):
        # the sum of distances between the query image and index base images
        distance = 0

        # dividng the index base into upper base image descriptor and lower base image descriptor
        upper_base_img_descriptor = index_base[i, 1][0:59]
        lower_base_img_descriptor = index_base[i, 1][59:]

        # initializing the upper and lower descriptors
        upper_uni_descriptor = dict(
            zip(uni_hist_keys, upper_base_img_descriptor))
        lower_uni_descriptor = dict(
            zip(uni_hist_keys, lower_base_img_descriptor))

        # looping through the size of the query image descriptor
        for w in range(1, (upper_descriptor.shape[0] - 1)):
            for h in range(1, (upper_descriptor.shape[1] - 1)):
                # getting the position of the current pixel in the upper and lower query images
                pixel_value_upper = upper_descriptor[w, h]
                pixel_value_lower = lower_descriptor[w, h]

                # checking if the upper or lower current pixel values are uniform
                # calculating the distance to the corresponding bin of the pixel value
                if pixel_value_upper in uni_hist_keys:
                    distance += min(
                        upper_uni_descriptor[pixel_value_upper][w, h], ltp_tau)
                else:
                    distance += min(upper_uni_descriptor['others'][w, h],
                                    ltp_tau)
                # the same operations for the lower binary images
                if pixel_value_lower in uni_hist_keys:
                    distance += min(
                        lower_uni_descriptor[pixel_value_lower][w, h], ltp_tau)
                else:
                    distance += min(lower_uni_descriptor['others'][w, h],
                                    ltp_tau)

        # appending the name of the image and the distance to the image distance list
        image_distance.append([index_base[i, 0], distance])

        # used for printing the progress on console
        sys.stdout.write('\r')
        sys.stdout.write("[%-20s] %d%%" % ('=' * (i //
                                                  (len(index_base) // 20) + 1),
                                           (i / len(index_base)) * 100 + 1))
        sys.stdout.flush()

    # sorting the distances by the smallest distance
    ordered_img_distances = sorted(image_distance, key=lambda x: x[1])

    # returning the n images with the shortest distance to the index images
    return ordered_img_distances[0:nbr_result]