def StructureExtraction(self, image, sc=10):
        SCALE = sc

        # Isolate horizontal and vertical lines using morphological operations
        horizontal = image.copy()
        vertical = image.copy()

        horizontal_size = int(horizontal.shape[1] / SCALE)
        horizontal_structure = cv.getStructuringElement(
            cv.MORPH_RECT, (horizontal_size, 1))
        utils.isolate_lines(horizontal, horizontal_structure)

        vertical_size = int(vertical.shape[0] / SCALE)
        vertical_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                                      (1, vertical_size))
        utils.isolate_lines(vertical, vertical_structure)

        # Create an image mask with just the horizontal
        # and vertical lines in the image. Then find
        # all contours in the mask.
        mask = horizontal + vertical

        (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                        cv.CHAIN_APPROX_SIMPLE)

        # Find intersections between the lines
        # to determine if the intersections are table joints.
        intersections = cv.bitwise_and(horizontal, vertical)
        return contours, intersections
Exemplo n.º 2
0
    def get_cells(self,
                  ori_img,
                  table_coords,
                  debug=False) -> List[np.ndarray]:
        # raise RuntimeError("此模块调试未完成!请选择其他获取表格框的方法!")
        cells = []
        for coord in table_coords:  # for each boarded table
            table_cell = []
            xmin, ymin, xmax, ymax = [int(k) for k in coord
                                      ]  # used for cropping & shifting
            table_img = ori_img[ymin:ymax, xmin:xmax]
            with utils.Timer("Traditional Cell Detection"):
                grayscale = cv.cvtColor(table_img, cv.COLOR_BGR2GRAY)
                filtered = cv.adaptiveThreshold(~grayscale,
                                                self.MAX_THRESHOLD_VALUE,
                                                cv.ADAPTIVE_THRESH_MEAN_C,
                                                cv.THRESH_BINARY,
                                                self.BLOCK_SIZE,
                                                self.THRESHOLD_CONSTANT)

                if debug:
                    cv.namedWindow('filtered', 0)
                    cv.resizeWindow('filtered', 900, 700)
                    cv.imshow('filtered', filtered)
                    cv.waitKey(0)

                horizontal = filtered.copy()
                vertical = filtered.copy()
                horizontal_size = int(horizontal.shape[1] / self.SCALE)
                horizontal_structure = cv.getStructuringElement(
                    cv.MORPH_RECT, (horizontal_size, 1))
                utils.isolate_lines(horizontal, horizontal_structure)

                vertical_size = int(vertical.shape[0] / self.SCALE)
                vertical_structure = cv.getStructuringElement(
                    cv.MORPH_RECT, (1, vertical_size))
                utils.isolate_lines(vertical, vertical_structure)

                mask = horizontal + vertical

                if debug:
                    cv.namedWindow('mask', 0)
                    cv.resizeWindow('mask', 900, 700)
                    cv.imshow('mask', mask)
                    cv.waitKey(0)

                (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                                cv.CHAIN_APPROX_SIMPLE)

                intersections = cv.bitwise_and(horizontal, vertical)

                if debug:
                    cv.namedWindow('intersections', 0)
                    cv.resizeWindow('intersections', 900, 700)
                    cv.imshow('intersections', intersections)
                    cv.waitKey(0)

                for i in range(len(contours)):
                    # Verify that region of interest is a table
                    (rect, table_joints) = utils.verify_table(
                        contours[i], intersections)
                    if rect == None or table_joints is None:
                        continue

                    # Create a new instance of a table
                    table = TraditionalTable(rect[0], rect[1], rect[2],
                                             rect[3])

                    # Get an n-dimensional array of the coordinates of the table joints
                    joint_coords = []
                    for j in range(len(table_joints)):
                        joint_coords.append(table_joints[j][0][0])
                    joint_coords = np.asarray(joint_coords)

                    # Returns indices of coordinates in sorted order
                    # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
                    # joint_coords:
                    # [[913 179], [695 179], [548 179], [285 179], [182 179],
                    # [ 72 179], [913 119], [695 119], [548 119], [285 119],
                    # [182 119], [ 72 119], [913  31], [695  31], [548  31],
                    # [457  31], [376  31], [285  31], [182  31], [ 72  31],
                    # [913   0], [695   0], [548   0], ... ]
                    sorted_indices = np.lexsort(
                        (joint_coords[:, 0], joint_coords[:, 1]))
                    joint_coords = joint_coords[sorted_indices]

                    # Store joint coordinates in the table instance
                    table.set_joints(joint_coords)

                    table_entries = table.get_table_entries()
                    for k in range(len(table_entries)):
                        row = table_entries[k]
                        for j in range(len(row)):
                            entry = row[j]  # xyxy
                            table_cell.append([
                                entry[0], entry[1], entry[2], entry[1],
                                entry[2], entry[3], entry[0], entry[3]
                            ])  # xyxyxyxy
            cells.append(np.array(table_cell))

        return cells
Exemplo n.º 3
0
To isolate the vertical and horizontal lines, 

1. Set a scale.
2. Create a structuring element.
3. Isolate the lines by eroding and then dilating the image.
"""
SCALE = 15

# Isolate horizontal and vertical lines using morphological operations
horizontal = filtered.copy()
vertical = filtered.copy()

horizontal_size = int(horizontal.shape[1] / SCALE)
horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                                (horizontal_size, 1))
utils.isolate_lines(horizontal, horizontal_structure)

vertical_size = int(vertical.shape[0] / SCALE)
vertical_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                              (1, vertical_size))
utils.isolate_lines(vertical, vertical_structure)

# =====================================================
# TABLE EXTRACTION
# =====================================================
# Create an image mask with just the horizontal
# and vertical lines in the image. Then find
# all contours in the mask.
mask = horizontal + vertical
(_, contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                   cv.CHAIN_APPROX_SIMPLE)
def conversion_algorithm(path):
    # scanning the image, applying perspective warping
    # and adaptive thresholding
    filtered, warped = scan_img(path)

    # line isolation
    SCALE = 23

    # isolate horizontal and vertical lines using morphological operations
    horizontal = filtered.copy()
    vertical = filtered.copy()

    horizontal_size = int(horizontal.shape[1] / SCALE)
    horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                                    (horizontal_size, 1))
    utils.isolate_lines(horizontal, horizontal_structure)

    vertical_size = int(vertical.shape[0] / SCALE)
    vertical_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                                  (1, vertical_size))
    utils.isolate_lines(vertical, vertical_structure)

    # TABLE EXTRACTION
    # create an image mask with just the horizontal
    # and vertical lines in the image. Then find
    # all contours in the mask.
    mask = horizontal + vertical
    cv.imwrite("processing_data/detected_lines.jpg", mask)
    (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                    cv.CHAIN_APPROX_SIMPLE)

    # find intersections between the lines
    # to determine if the intersections are table joints.
    intersections = cv.bitwise_and(horizontal, vertical)

    # get tables from the images
    tables = []  # list of tables
    for i in range(len(contours)):
        # verify that region of interest is a table
        (rect, table_joints) = utils.verify_table(contours[i], intersections)
        if rect == None or table_joints == None:
            continue

        # create a new instance of a table
        table = Table(rect[0], rect[1], rect[2], rect[3])

        # get an n-dimensional array of the coordinates of the table joints
        joint_coords = []
        for i in range(len(table_joints)):
            joint_coords.append(table_joints[i][0][0])
        joint_coords = np.asarray(joint_coords)

        # returns indices of coordinates in sorted order
        # sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
        sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1]))
        joint_coords = joint_coords[sorted_indices]

        # store joint coordinates in the table instance
        table.set_joints(joint_coords)

        tables.append(table)

        cv.rectangle(warped, (table.x, table.y),
                     (table.x + table.w, table.y + table.h), (0, 255, 0), 1, 8,
                     0)
        cv.imwrite("processing_data/table_boundaries.jpg", warped)
Exemplo n.º 5
0
def find_table(image):
	# Convert resized RGB image to grayscale
	NUM_CHANNELS = 3
	if len(image.shape) == NUM_CHANNELS:
	    grayscale = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

	# =====================================================
	# IMAGE FILTERING (using adaptive thresholding)
	# =====================================================
	MAX_THRESHOLD_VALUE = 255
	BLOCK_SIZE = 15
	THRESHOLD_CONSTANT = 0

	# Filter image
	filtered = cv.adaptiveThreshold(~grayscale, MAX_THRESHOLD_VALUE, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, BLOCK_SIZE, THRESHOLD_CONSTANT)
 
	# =====================================================
	# LINE ISOLATION
	# =====================================================
	SCALE = 15

	# Isolate horizontal and vertical lines using morphological operations
	horizontal = filtered.copy()
	vertical = filtered.copy()

	horizontal_size = int(horizontal.shape[1] / SCALE)
	horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1))
	utils.isolate_lines(horizontal, horizontal_structure)

	vertical_size = int(vertical.shape[0] / SCALE)
	vertical_structure = cv.getStructuringElement(cv.MORPH_RECT, (1, vertical_size))
	utils.isolate_lines(vertical, vertical_structure)

	# =====================================================
	# TABLE EXTRACTION
	# =====================================================
	# Create an image mask with just the horizontal
	# and vertical lines in the image. Then find
	# all contours in the mask.
	mask = horizontal + vertical
	(contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

	# Find intersections between the lines
	# to determine if the intersections are table joints.
	intersections = cv.bitwise_and(horizontal, vertical)

	# Get tables from the images
	tables = [] # list of tables
	for i in range(len(contours)):
	    # Verify that region of interest is a table
	    (rect, table_joints) = utils.verify_table(contours[i], intersections)
	    if rect == None or table_joints == None:
	        continue

	    # Create a new instance of a table
	    table = Table(rect[0], rect[1], rect[2], rect[3])

	    # Get an n-dimensional array of the coordinates of the table joints
	    joint_coords = []
	    for i in range(len(table_joints)):
	        joint_coords.append(table_joints[i][0][0])
	    joint_coords = np.asarray(joint_coords)

	    # Returns indices of coordinates in sorted order
	    # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
	    sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1]))
	    joint_coords = joint_coords[sorted_indices]

	    # Store joint coordinates in the table instance
	    table.set_joints(joint_coords)

	    tables.append(table)
	if len(tables)!=0:
	    return tables
	else:
		return []