def DetectTable(): content = request.json download_url = "https://tableextractor.blob.core.windows.net/extracted-images/1557478921912_Page%2001.png" response = urllib.request.urlopen(download_url) txt=download_url.split('/') imageName=txt[len(txt)-1] file = open("ExtracedTables\\"+imageName, 'wb') file.write(response.read()) file.close() image=cv.imread("ExtracedTables\\"+imageName,1) imageCopy = image ippObj = ipp.ImagePreProcessing() image = ippObj.GammaAdujst(image) image = ippObj.Threshholding(image, 21) (contours, intersections) = ippObj.StructureExtraction(image,10) # Get tables from the images tables = [] # list of tables for i in range(len(contours)): (rect, table_joints) = tableutils.verify_table(contours[i],intersections) if rect == None or table_joints == None: continue # Create a new instance of a table table = TableStructure(rect[0], rect[1], rect[2], rect[3]) # Get an n-dimensional array of the coordinates of the table joints joint_coords = [] for i in range(len(table_joints)): joint_coords.append(table_joints[i][0][0]) joint_coords = np.asarray(joint_coords) # Returns indices of coordinates in sorted order # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc sorted_indices = np.lexsort((joint_coords[:, 0],joint_coords[:, 1])) joint_coords = joint_coords[sorted_indices] # Store joint coordinates in the table instance table.set_joints(joint_coords) tables.append(table) te = TableExtraction() tables = te.tableSort(tables) images = te.ExtractTable(imageCopy, tables) cv.imwrite("ExtracedTables/"+imageName, images) result=te.StoreExtractedTable(images) return result
def extract(image): mask, horizontal, vertical = get_grid_mask(image) contours, _ = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) # Find intersections between the lines to determine if the intersections are table joints. intersections = cv.bitwise_and(horizontal, vertical) tables = [] for table_number, contour in enumerate(contours): # verify that Region of Interest (ROI) is a table rect = verify_table(contour, intersections) if rect is None: continue corners = find_corners_from_contour(contour) table_image = crop_and_warp(image, corners) # add outer borders artificially, some images may not have outer borders # this will lead to outer columns being omitted table_image = add_border_padding(table_image, w=(2, 2, 2, 4), color=(100, 100, 100)) cv.imwrite('out/final_image.jpg', table_image) # find table joints, intersections for the warped table m, h, v = get_grid_mask(table_image) table_intersections = cv.bitwise_and(h, v) intersection_points = find_intersection_mean_cords(table_intersections) if len(intersection_points) < 5: continue table = Table(table_image, intersection_points) table.build() tables.append(table) return tables
def get_cells(self, ori_img, table_coords, debug=False) -> List[np.ndarray]: # raise RuntimeError("此模块调试未完成!请选择其他获取表格框的方法!") cells = [] for coord in table_coords: # for each boarded table table_cell = [] xmin, ymin, xmax, ymax = [int(k) for k in coord ] # used for cropping & shifting table_img = ori_img[ymin:ymax, xmin:xmax] with utils.Timer("Traditional Cell Detection"): grayscale = cv.cvtColor(table_img, cv.COLOR_BGR2GRAY) filtered = cv.adaptiveThreshold(~grayscale, self.MAX_THRESHOLD_VALUE, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, self.BLOCK_SIZE, self.THRESHOLD_CONSTANT) if debug: cv.namedWindow('filtered', 0) cv.resizeWindow('filtered', 900, 700) cv.imshow('filtered', filtered) cv.waitKey(0) horizontal = filtered.copy() vertical = filtered.copy() horizontal_size = int(horizontal.shape[1] / self.SCALE) horizontal_structure = cv.getStructuringElement( cv.MORPH_RECT, (horizontal_size, 1)) utils.isolate_lines(horizontal, horizontal_structure) vertical_size = int(vertical.shape[0] / self.SCALE) vertical_structure = cv.getStructuringElement( cv.MORPH_RECT, (1, vertical_size)) utils.isolate_lines(vertical, vertical_structure) mask = horizontal + vertical if debug: cv.namedWindow('mask', 0) cv.resizeWindow('mask', 900, 700) cv.imshow('mask', mask) cv.waitKey(0) (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) intersections = cv.bitwise_and(horizontal, vertical) if debug: cv.namedWindow('intersections', 0) cv.resizeWindow('intersections', 900, 700) cv.imshow('intersections', intersections) cv.waitKey(0) for i in range(len(contours)): # Verify that region of interest is a table (rect, table_joints) = utils.verify_table( contours[i], intersections) if rect == None or table_joints is None: continue # Create a new instance of a table table = TraditionalTable(rect[0], rect[1], rect[2], rect[3]) # Get an n-dimensional array of the coordinates of the table joints joint_coords = [] for j in range(len(table_joints)): joint_coords.append(table_joints[j][0][0]) joint_coords = np.asarray(joint_coords) # Returns indices of coordinates in sorted order # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc # joint_coords: # [[913 179], [695 179], [548 179], [285 179], [182 179], # [ 72 179], [913 119], [695 119], [548 119], [285 119], # [182 119], [ 72 119], [913 31], [695 31], [548 31], # [457 31], [376 31], [285 31], [182 31], [ 72 31], # [913 0], [695 0], [548 0], ... ] sorted_indices = np.lexsort( (joint_coords[:, 0], joint_coords[:, 1])) joint_coords = joint_coords[sorted_indices] # Store joint coordinates in the table instance table.set_joints(joint_coords) table_entries = table.get_table_entries() for k in range(len(table_entries)): row = table_entries[k] for j in range(len(row)): entry = row[j] # xyxy table_cell.append([ entry[0], entry[1], entry[2], entry[1], entry[2], entry[3], entry[0], entry[3] ]) # xyxyxyxy cells.append(np.array(table_cell)) return cells
# Create an image mask with just the horizontal # and vertical lines in the image. Then find # all contours in the mask. mask = horizontal + vertical (_, contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) # Find intersections between the lines # to determine if the intersections are table joints. intersections = cv.bitwise_and(horizontal, vertical) # Get tables from the images tables = [] # list of tables for i in range(len(contours)): # Verify that region of interest is a table (rect, table_joints) = utils.verify_table(contours[i], intersections) if rect == None or table_joints == None: continue # Create a new instance of a table table = Table(rect[0], rect[1], rect[2], rect[3]) # Get an n-dimensional array of the coordinates of the table joints joint_coords = [] for i in range(len(table_joints)): joint_coords.append(table_joints[i][0][0]) joint_coords = np.asarray(joint_coords) # Returns indices of coordinates in sorted order # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1]))
def conversion_algorithm(path): # scanning the image, applying perspective warping # and adaptive thresholding filtered, warped = scan_img(path) # line isolation SCALE = 23 # isolate horizontal and vertical lines using morphological operations horizontal = filtered.copy() vertical = filtered.copy() horizontal_size = int(horizontal.shape[1] / SCALE) horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1)) utils.isolate_lines(horizontal, horizontal_structure) vertical_size = int(vertical.shape[0] / SCALE) vertical_structure = cv.getStructuringElement(cv.MORPH_RECT, (1, vertical_size)) utils.isolate_lines(vertical, vertical_structure) # TABLE EXTRACTION # create an image mask with just the horizontal # and vertical lines in the image. Then find # all contours in the mask. mask = horizontal + vertical cv.imwrite("processing_data/detected_lines.jpg", mask) (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) # find intersections between the lines # to determine if the intersections are table joints. intersections = cv.bitwise_and(horizontal, vertical) # get tables from the images tables = [] # list of tables for i in range(len(contours)): # verify that region of interest is a table (rect, table_joints) = utils.verify_table(contours[i], intersections) if rect == None or table_joints == None: continue # create a new instance of a table table = Table(rect[0], rect[1], rect[2], rect[3]) # get an n-dimensional array of the coordinates of the table joints joint_coords = [] for i in range(len(table_joints)): joint_coords.append(table_joints[i][0][0]) joint_coords = np.asarray(joint_coords) # returns indices of coordinates in sorted order # sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1])) joint_coords = joint_coords[sorted_indices] # store joint coordinates in the table instance table.set_joints(joint_coords) tables.append(table) cv.rectangle(warped, (table.x, table.y), (table.x + table.w, table.y + table.h), (0, 255, 0), 1, 8, 0) cv.imwrite("processing_data/table_boundaries.jpg", warped)
def find_table(image): # Convert resized RGB image to grayscale NUM_CHANNELS = 3 if len(image.shape) == NUM_CHANNELS: grayscale = cv.cvtColor(image, cv.COLOR_BGR2GRAY) # ===================================================== # IMAGE FILTERING (using adaptive thresholding) # ===================================================== MAX_THRESHOLD_VALUE = 255 BLOCK_SIZE = 15 THRESHOLD_CONSTANT = 0 # Filter image filtered = cv.adaptiveThreshold(~grayscale, MAX_THRESHOLD_VALUE, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, BLOCK_SIZE, THRESHOLD_CONSTANT) # ===================================================== # LINE ISOLATION # ===================================================== SCALE = 15 # Isolate horizontal and vertical lines using morphological operations horizontal = filtered.copy() vertical = filtered.copy() horizontal_size = int(horizontal.shape[1] / SCALE) horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1)) utils.isolate_lines(horizontal, horizontal_structure) vertical_size = int(vertical.shape[0] / SCALE) vertical_structure = cv.getStructuringElement(cv.MORPH_RECT, (1, vertical_size)) utils.isolate_lines(vertical, vertical_structure) # ===================================================== # TABLE EXTRACTION # ===================================================== # Create an image mask with just the horizontal # and vertical lines in the image. Then find # all contours in the mask. mask = horizontal + vertical (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) # Find intersections between the lines # to determine if the intersections are table joints. intersections = cv.bitwise_and(horizontal, vertical) # Get tables from the images tables = [] # list of tables for i in range(len(contours)): # Verify that region of interest is a table (rect, table_joints) = utils.verify_table(contours[i], intersections) if rect == None or table_joints == None: continue # Create a new instance of a table table = Table(rect[0], rect[1], rect[2], rect[3]) # Get an n-dimensional array of the coordinates of the table joints joint_coords = [] for i in range(len(table_joints)): joint_coords.append(table_joints[i][0][0]) joint_coords = np.asarray(joint_coords) # Returns indices of coordinates in sorted order # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1])) joint_coords = joint_coords[sorted_indices] # Store joint coordinates in the table instance table.set_joints(joint_coords) tables.append(table) if len(tables)!=0: return tables else: return []