Пример #1
0
def extract_all_words(image, filter='CAPS'):
    """extract all of the words form the image.

    extracts words from the inverse and normal threshold of the input image so
    and joins these lists together. this is because pytesseract likes black text
    on a white background.

    :param image: input image
    :type image: cv2 image
    :return: extracted words
    :rtype: list
    """
    # standard and inverse threshold so that at least one of the images
    # is black text on white background tessearact likes this more
    variations = []
    _, thresh = cv2.threshold(convert.bgr_to_gray(image), 127, 255,
                              cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    variations.append(thresh)
    _, thresh_inv = cv2.threshold(convert.bgr_to_gray(image), 127, 255,
                                  cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    variations.append(thresh_inv)

    # get the text from each version of the image
    if filter is 'CAPS':
        text = []
        for v in variations:
            text.append(get_caps(v))
        return text
    elif filter is 'NUMS':
        text = []
        for v in variations:
            text.append(get_nums(v))
        return text
Пример #2
0
def match(test, exemplar_descs):
    """using ORB decriptors compute match between test and exemplar_descs.
    
    :param test: image to test
    :type test: cv2 image
    :param exemplar: exemplar descriptions
    :type exemplar: list of orb descriptors
    :return: distance sorted matches
    :rtype: list
    """
    # create an ORB object
    orb = cv2.ORB_create()

    # convert to GRAYSCALE
    test_g = convert.bgr_to_gray(test)

    # Find the keypoints and descriptors with ORB
    _, descs = orb.detectAndCompute(test_g, None)

    # create a brute force matcher object
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

    # init to hold the best prediciton
    prediction = ('(none)', np.inf)

    # match with each exmplar
    for d in exemplar_descs:
        # match the descriptors
        matches = bf.match(d[1], descs)

        # sort in order of distance, lowest first
        sorted_matches = sorted(matches, key=lambda x: x.distance)

        # extract just the distances
        distances = [m.distance for m in sorted_matches]

        # calculate a score
        score = sum(distances[:4])

        # no matches
        if score == 0:
            score = np.inf
        # update prediction because this match is closer
        if score < prediction[1]:
            prediction = (d[0], score)

    return prediction[0]
Пример #3
0
def create_descriptions(dirname):
    """pre-compute a list of descriptors for ORB matching.
    
    :param dirname: directory to find the images to use
    :type dirname: str
    :return: descriptors and filenames
    :rtype: list of tuples
    """
    descs = []
    symbols = load_symbols(dirname)

    orb = cv2.ORB_create()

    for s in symbols:
        gray = convert.bgr_to_gray(s[1])
        _, desc = orb.detectAndCompute(gray, None)
        descs.append((s[0], desc))

    return descs
Пример #4
0
def process(path):
    descriptions = []

    fname, ext = utils.get_fname_and_ext(path)
    descriptions.append(fname + ext)

    # read in the image as bgr
    color_img = io.read_color(path)

    # make a copy of the image in grayscale
    gray_img = convert.bgr_to_gray(color_img)

    #* FIND DIAMONDS
    # get the locations of the diamonds
    diamonds = features.get_diamonds(gray_img)

    for diamond in diamonds:
        # to hold the description for this diamond
        description = {}

        # isolate the current diamond from in the original image
        isolated = features.isolate_diamond(color_img, diamond)

        # transform the diamond to a given position
        extracted = transforms.xform(
            isolated, convert.contour_to_ptarray(diamond), XFORM_DESTINATION)
        
        #* EXTRACT COLORS
        #TODO: employ some sort of white balancing and maybe kNN to improve color detection
        normalised = filters.normlise_intensity(extracted)
        # normalised = filters.white_balance(extracted)

        # remove white and black by creating a threshold mask
        thresh = cv2.inRange(convert.bgr_to_lab(
            normalised), (70, 0, 0), (200, 255, 255))

        # create the masks for the areas where the color will be extracted
        top_color_mask = features.mask_from_contours(
            TOP_COLOR_CNTS, thresh.shape)
        bot_color_mask = features.mask_from_contours(
            BOT_COLOR_CNTS, thresh.shape)
        # and the mask with the thresh output, hopefully stop the blacks and
        # whites messing with the average color
        top_color_mask = utils.combine_masks(top_color_mask, thresh)
        bot_color_mask = utils.combine_masks(bot_color_mask, thresh)

        # get the top color
        color = features.get_color(
            normalised, top_color_mask, COLOR_NAMES, COLOR_VALUES)
        description['top'] = color

        # get the bottom color
        color = features.get_color(
            normalised, bot_color_mask, COLOR_NAMES, COLOR_VALUES)
        description['bottom'] = color

        balanced = filters.white_balance(extracted)

        #* EXTRACT CLASS

        # crop to the general location of the class
        class_crop = utils.crop_to_contour(balanced, CLASS_CNTS2)

        words = ocr.extract_all_words(class_crop, filter='NUMS')

        label = ocr.determine_label(words + l, CLASS_DICTIONARY)

        description['class'] = label

        #* EXTRACT LABEL
        # crop to the general area of the text
        label_loc = utils.crop_to_contour(balanced, LABEL_CNTS)

        # find contours to isolate the areas text could be
        cnts, highlighted, found = ocr.find_text(label_loc)

        # if there were areas of text to investigate
        if found:
            # optimise the contours found to only include the important ones
            bbox = ocr.find_optimal_components_subset(cnts, highlighted)

            # crop to the optimal height, keep the entire width
            cropped = utils.crop_to_bbox(label_loc, bbox, ignore='x')

            # extract the words from the image
            words = ocr.extract_all_words(cropped, filter='CAPS')

            # determine the correct label from the words
            label = ocr.determine_label(words, LABEL_DICTIONARY)

            # try again with a larger crop
            if label is '(none)':
                cropped = utils.crop_to_bbox(label_loc, bbox, padding=(13, 13), ignore='x')
                words = ocr.extract_all_words(cropped)
                label = ocr.determine_label(words, LABEL_DICTIONARY)

            # add the label to the description
            description['text'] = label
        # expand the crop and try again
        else:
            # add the label to the description
            description['text'] = '(none)'

        #* EXTRACT SYMBOL
        symbols = utils.get_images_in_dir('./symbols')
        grab = utils.crop_to_bbox(extracted, (0, 0, 500, 250))

        # init to hold the best prediciton
        prediction = ('(none)', np.inf)

        for s in symbols:
            s_img = io.read_color(s)
            matches = orb.quick_orb(grab, s_img)
            distances = [m.distance for m in matches]
            score = sum(distances[:4])
            if score == 0:
                score = np.inf
            if score < prediction[1]:
                prediction = (s, score)

        # get the filename of the symbol
        fname = utils.get_fname(prediction[0])
        # lookup the symbol name from the filename and add it to the description
        symbol = SYMBOL_LOOKUP[fname]
        description['symbol'] = symbol

        #* APPEND DESCRIPTION
        descriptions.append(description)
        # print(json.dumps(description, indent=4))
        # vis.show(balanced)

    return(descriptions)
Пример #5
0
def find_text(image):
    """attempt to isolate the text in the image.
    
    :param image: input image
    :type image: cv2 image
    :return: contours, cropped image, if text is found
    :rtype: cv2 cnts, cv2 image, boolean
    """
    # convert to GRAYSCALE
    g_label = convert.bgr_to_gray(image)

    # binarise the image
    _, thresh = cv2.threshold(g_label, 127, 255,
                              cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # detect edges
    canny = features.apply_canny(thresh, 175, 190)

    # blur
    blur = filters.median_blur(canny, 3)

    # xor between canny and blur to attempt to remove some fo the lines from the diamond
    text_im = cv2.bitwise_xor(canny, blur)

    # blur again
    text_blur = filters.apply_gaussian_blur(text_im, 3)

    # attemtp to dilate the text so it is one big blob
    kernel_rect = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1))
    dilated = cv2.dilate(text_blur, kernel_rect, iterations=6)

    # attempt to remove anythin gthat is not veritcal or horizontal i.e. lines form the diamond
    kernel_cross = cv2.getStructuringElement(cv2.MORPH_CROSS, (7, 7))
    erosion = cv2.erode(dilated, kernel_cross, iterations=2)

    # threshold the image again
    _, thresh_dilated = cv2.threshold(erosion, 127, 255, cv2.THRESH_BINARY)

    # blur again
    td_blur = filters.median_blur(thresh_dilated, 9)

    # dilate the text into itself
    kernel_rect2 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 2))
    td_dilate = cv2.dilate(td_blur, kernel_rect2, iterations=10)

    for _ in range(12):
        td_dilate = filters.apply_gaussian_blur(td_dilate, 5)
        td_dilate = cv2.dilate(td_blur, kernel_rect2, iterations=10)

    kernel_rect3 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 8))
    td_dilate = cv2.dilate(td_dilate, kernel_rect3, iterations=3)

    # find contours that encompass the areas text could be
    _, cnts, _ = cv2.findContours(td_dilate.copy(), cv2.RETR_EXTERNAL,
                                  cv2.CHAIN_APPROX_SIMPLE)

    # report whether contours weere foun
    if cnts:
        found = True
        # optimise the contours found to only include the important ones
        bbox = find_optimal_components_subset(cnts, td_dilate)
        # crop to the optimal height, keep the entire width
        im = utils.crop_to_bbox(image, bbox, ignore='x')
    else:
        found = False
        im = image

    return im, found