def extract_table(table_body, __line__, lines=None): # Deciding variable if (__line__ == 1): # Check if table image is bordered or borderless temp_lines_hor, temp_lines_ver = line_detection(table_body) else: temp_lines_hor, temp_lines_ver = lines if len(temp_lines_hor) == 0 or len(temp_lines_ver) == 0: print("Either Horizontal Or Vertical Lines Not Detected") return None table = table_body.copy() x = 0 y = 0 k = 0 points = [] print("[Table status] : Processing table with lines") # Remove same lines detected closer for x1, y1, x2, y2 in temp_lines_ver: point = [] for x3, y3, x4, y4 in temp_lines_hor: try: k += 1 x, y = line_intersection(x1, y1, x2, y2, x3, y3, x4, y4) point.append([x, y]) except: continue points.append(point) for point in points: for x, y in point: cv2.line(table, (x, y), (x, y), (0, 0, 255), 8) cv2.imshow("intersection", table) cv2.waitKey(0) # boxno = -1 box = [] flag = 1 lastCache = [] ## creating bounding boxes of cells from the points detected ## This is still under work and might fail on some images for i, row in enumerate(points): limitj = len(row) currentVala = [] for j, col in enumerate(row): if (j == limitj - 1): break if (i == 0): nextcol = row[j + 1] lastCache.append([ col[0], col[1], nextcol[0], nextcol[1], 9999, 9999, 9999, 9999 ]) else: nextcol = row[j + 1] currentVala.append([ col[0], col[1], nextcol[0], nextcol[1], 9999, 9999, 9999, 9999 ]) # Matching flag = 1 index = [] for k, last in enumerate(lastCache): if (col[1] == last[1]) and lastCache[k][4] == 9999: lastCache[k][4] = col[0] lastCache[k][5] = col[1] if lastCache[k][4] != 9999 and lastCache[k][6] != 9999: box.append(lastCache[k]) index.append(k) flag = 1 if (nextcol[1] == last[3]) and lastCache[k][6] == 9999: lastCache[k][6] = nextcol[0] lastCache[k][7] = nextcol[1] if lastCache[k][4] != 9999 and lastCache[k][6] != 9999: box.append(lastCache[k]) index.append(k) flag = 1 if len(lastCache) != 0: if lastCache[k][4] == 9999 or lastCache[k][6] == 9999: flag = 0 # print(index) for k in index: lastCache.pop(k) # tranfsering if flag == 0: for last in lastCache: if last[4] == 9999 or last[6] == 9999: currentVala.append(last) if (i != 0): lastCache = currentVala ## Visualizing the cells ## # count = 1 # for i in box: # cv2.rectangle(table_body, (i[0], i[1]), (i[6], i[7]), (int(i[7]%255),0,int(i[0]%255)), 2) # # count+=1 # cv2.imshow("cells",table_body) # cv2.waitKey(0) ############################ return box
import numpy as np from Functions.line_detection import line_detection base_path = '/Volumes/my_disk/company/sensedeal/217_PycharmProject/bbtv/PaddleOCR-1.0-2021/doc/my_imgs_11/' image_name_list = os.listdir(base_path) for image_name in image_name_list: image = cv2.imread(base_path + image_name) img_h, img_w, _ = np.shape(image) if img_h > 1000: resize_h = 1000 else: resize_h = img_w temp_lines_hor, temp_lines_ver = line_detection(image) temp_lines_hor.append([0, 0, img_w, 0]) temp_lines_hor.append([0, img_h, img_w, img_h]) temp_lines_ver.append([0, 0, 0, img_h]) temp_lines_ver.append([img_w, 0, img_w, img_h]) show_img = image.copy() temp = [] for line in temp_lines_hor: x1, y1, x2, y2 = line if abs(x1 - x2) / img_w > 0.618: temp.append([x1, y1, x2, y2]) temp_lines_hor = temp for i in temp_lines_hor: cv2.line(show_img, (i[0], i[1]), (i[2], i[3]), (0, 0, 255), 2)