def extract_all_words(image, filter='CAPS'): """extract all of the words form the image. extracts words from the inverse and normal threshold of the input image so and joins these lists together. this is because pytesseract likes black text on a white background. :param image: input image :type image: cv2 image :return: extracted words :rtype: list """ # standard and inverse threshold so that at least one of the images # is black text on white background tessearact likes this more variations = [] _, thresh = cv2.threshold(convert.bgr_to_gray(image), 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) variations.append(thresh) _, thresh_inv = cv2.threshold(convert.bgr_to_gray(image), 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU) variations.append(thresh_inv) # get the text from each version of the image if filter is 'CAPS': text = [] for v in variations: text.append(get_caps(v)) return text elif filter is 'NUMS': text = [] for v in variations: text.append(get_nums(v)) return text
def match(test, exemplar_descs): """using ORB decriptors compute match between test and exemplar_descs. :param test: image to test :type test: cv2 image :param exemplar: exemplar descriptions :type exemplar: list of orb descriptors :return: distance sorted matches :rtype: list """ # create an ORB object orb = cv2.ORB_create() # convert to GRAYSCALE test_g = convert.bgr_to_gray(test) # Find the keypoints and descriptors with ORB _, descs = orb.detectAndCompute(test_g, None) # create a brute force matcher object bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) # init to hold the best prediciton prediction = ('(none)', np.inf) # match with each exmplar for d in exemplar_descs: # match the descriptors matches = bf.match(d[1], descs) # sort in order of distance, lowest first sorted_matches = sorted(matches, key=lambda x: x.distance) # extract just the distances distances = [m.distance for m in sorted_matches] # calculate a score score = sum(distances[:4]) # no matches if score == 0: score = np.inf # update prediction because this match is closer if score < prediction[1]: prediction = (d[0], score) return prediction[0]
def create_descriptions(dirname): """pre-compute a list of descriptors for ORB matching. :param dirname: directory to find the images to use :type dirname: str :return: descriptors and filenames :rtype: list of tuples """ descs = [] symbols = load_symbols(dirname) orb = cv2.ORB_create() for s in symbols: gray = convert.bgr_to_gray(s[1]) _, desc = orb.detectAndCompute(gray, None) descs.append((s[0], desc)) return descs
def process(path): descriptions = [] fname, ext = utils.get_fname_and_ext(path) descriptions.append(fname + ext) # read in the image as bgr color_img = io.read_color(path) # make a copy of the image in grayscale gray_img = convert.bgr_to_gray(color_img) #* FIND DIAMONDS # get the locations of the diamonds diamonds = features.get_diamonds(gray_img) for diamond in diamonds: # to hold the description for this diamond description = {} # isolate the current diamond from in the original image isolated = features.isolate_diamond(color_img, diamond) # transform the diamond to a given position extracted = transforms.xform( isolated, convert.contour_to_ptarray(diamond), XFORM_DESTINATION) #* EXTRACT COLORS #TODO: employ some sort of white balancing and maybe kNN to improve color detection normalised = filters.normlise_intensity(extracted) # normalised = filters.white_balance(extracted) # remove white and black by creating a threshold mask thresh = cv2.inRange(convert.bgr_to_lab( normalised), (70, 0, 0), (200, 255, 255)) # create the masks for the areas where the color will be extracted top_color_mask = features.mask_from_contours( TOP_COLOR_CNTS, thresh.shape) bot_color_mask = features.mask_from_contours( BOT_COLOR_CNTS, thresh.shape) # and the mask with the thresh output, hopefully stop the blacks and # whites messing with the average color top_color_mask = utils.combine_masks(top_color_mask, thresh) bot_color_mask = utils.combine_masks(bot_color_mask, thresh) # get the top color color = features.get_color( normalised, top_color_mask, COLOR_NAMES, COLOR_VALUES) description['top'] = color # get the bottom color color = features.get_color( normalised, bot_color_mask, COLOR_NAMES, COLOR_VALUES) description['bottom'] = color balanced = filters.white_balance(extracted) #* EXTRACT CLASS # crop to the general location of the class class_crop = utils.crop_to_contour(balanced, CLASS_CNTS2) words = ocr.extract_all_words(class_crop, filter='NUMS') label = ocr.determine_label(words + l, CLASS_DICTIONARY) description['class'] = label #* EXTRACT LABEL # crop to the general area of the text label_loc = utils.crop_to_contour(balanced, LABEL_CNTS) # find contours to isolate the areas text could be cnts, highlighted, found = ocr.find_text(label_loc) # if there were areas of text to investigate if found: # optimise the contours found to only include the important ones bbox = ocr.find_optimal_components_subset(cnts, highlighted) # crop to the optimal height, keep the entire width cropped = utils.crop_to_bbox(label_loc, bbox, ignore='x') # extract the words from the image words = ocr.extract_all_words(cropped, filter='CAPS') # determine the correct label from the words label = ocr.determine_label(words, LABEL_DICTIONARY) # try again with a larger crop if label is '(none)': cropped = utils.crop_to_bbox(label_loc, bbox, padding=(13, 13), ignore='x') words = ocr.extract_all_words(cropped) label = ocr.determine_label(words, LABEL_DICTIONARY) # add the label to the description description['text'] = label # expand the crop and try again else: # add the label to the description description['text'] = '(none)' #* EXTRACT SYMBOL symbols = utils.get_images_in_dir('./symbols') grab = utils.crop_to_bbox(extracted, (0, 0, 500, 250)) # init to hold the best prediciton prediction = ('(none)', np.inf) for s in symbols: s_img = io.read_color(s) matches = orb.quick_orb(grab, s_img) distances = [m.distance for m in matches] score = sum(distances[:4]) if score == 0: score = np.inf if score < prediction[1]: prediction = (s, score) # get the filename of the symbol fname = utils.get_fname(prediction[0]) # lookup the symbol name from the filename and add it to the description symbol = SYMBOL_LOOKUP[fname] description['symbol'] = symbol #* APPEND DESCRIPTION descriptions.append(description) # print(json.dumps(description, indent=4)) # vis.show(balanced) return(descriptions)
def find_text(image): """attempt to isolate the text in the image. :param image: input image :type image: cv2 image :return: contours, cropped image, if text is found :rtype: cv2 cnts, cv2 image, boolean """ # convert to GRAYSCALE g_label = convert.bgr_to_gray(image) # binarise the image _, thresh = cv2.threshold(g_label, 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # detect edges canny = features.apply_canny(thresh, 175, 190) # blur blur = filters.median_blur(canny, 3) # xor between canny and blur to attempt to remove some fo the lines from the diamond text_im = cv2.bitwise_xor(canny, blur) # blur again text_blur = filters.apply_gaussian_blur(text_im, 3) # attemtp to dilate the text so it is one big blob kernel_rect = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) dilated = cv2.dilate(text_blur, kernel_rect, iterations=6) # attempt to remove anythin gthat is not veritcal or horizontal i.e. lines form the diamond kernel_cross = cv2.getStructuringElement(cv2.MORPH_CROSS, (7, 7)) erosion = cv2.erode(dilated, kernel_cross, iterations=2) # threshold the image again _, thresh_dilated = cv2.threshold(erosion, 127, 255, cv2.THRESH_BINARY) # blur again td_blur = filters.median_blur(thresh_dilated, 9) # dilate the text into itself kernel_rect2 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 2)) td_dilate = cv2.dilate(td_blur, kernel_rect2, iterations=10) for _ in range(12): td_dilate = filters.apply_gaussian_blur(td_dilate, 5) td_dilate = cv2.dilate(td_blur, kernel_rect2, iterations=10) kernel_rect3 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 8)) td_dilate = cv2.dilate(td_dilate, kernel_rect3, iterations=3) # find contours that encompass the areas text could be _, cnts, _ = cv2.findContours(td_dilate.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # report whether contours weere foun if cnts: found = True # optimise the contours found to only include the important ones bbox = find_optimal_components_subset(cnts, td_dilate) # crop to the optimal height, keep the entire width im = utils.crop_to_bbox(image, bbox, ignore='x') else: found = False im = image return im, found