def test_matrix(self): builder = NeighborGraphBuilder([{ 'x': 500, 'width': 50, 'y': 500, 'height': 100 }, { 'x': 300, 'width': 100, 'y': 500, 'height': 100 }, { 'x': 500, 'width': 100, 'y': 300, 'height': 100 }, { 'x': 500, 'width': 100, 'y': 700, 'height': 100 }], np.zeros((1000, 1000))) m = builder.get_neighbor_matrix() assert (m[0, 0] == 1) assert (m[0, 1] == 2) assert (m[0, 3] == 3) assert (m[0, 2] == -1) assert (m[1, 0] == -1) assert (m[1, 1] == -1) assert (m[1, 2] == 0) assert (m[1, 3] == -1)
def dump_doc(self, all_tokens, all_tokens_rects, image, file_name): N = len(all_tokens) height, width = np.shape(image) classes = np.zeros(N) rect_matrix = np.zeros((N, 4)) embeddings_matrix = np.zeros((N, 300)) for i in range(N): token_rect = all_tokens_rects[i] index = 0 if image[int(token_rect['y'] + token_rect['height'] / 2), int(token_rect['x'] + token_rect['width'] / 2)] == 0 else 1 classes[i] = index rect_matrix[i, 0] = token_rect['x'] / width rect_matrix[i, 1] = token_rect['y'] / height rect_matrix[i, 2] = token_rect['width'] / width rect_matrix[i, 3] = token_rect['height'] / height embedding = self.glove_reader.get_vector(all_tokens[i]) if embedding is None: embedding = np.ones((300)) * (-1) embeddings_matrix[i] = embedding graph_builder = NeighborGraphBuilder(all_tokens_rects, image) neighbor_graph, neighbor_distance_matrix = graph_builder.get_neighbor_matrix( ) neighbor_distance_matrix[:, 0] = neighbor_distance_matrix[:, 0] / width neighbor_distance_matrix[:, 1] = neighbor_distance_matrix[:, 1] / height neighbor_distance_matrix[:, 2] = neighbor_distance_matrix[:, 2] / width neighbor_distance_matrix[:, 3] = neighbor_distance_matrix[:, 3] / height document = DocumentFeatures(embeddings_matrix, rect_matrix, neighbor_distance_matrix, neighbor_graph, classes) with open(file_name, 'wb') as f: pickle.dump(document, f, pickle.HIGHEST_PROTOCOL)
def see_table(self, table, increment): print("Converting doc", self.png_path) table_attributes = table.attrib tx1 = int(table_attributes['x0']) ty1 = int(table_attributes['y0']) tx2 = int(table_attributes['x1']) ty2 = int(table_attributes['y1']) model = ConvolutionalAutoencoder() model.prepare_for_manual_testing() image_table_cropped = self.image[ty1:ty2 + 1, tx1:tx2 + 1] spatial_features = model.get_feature_map(image_table_cropped).astype( np.float64) sorted_path_full = self.sorted_path + "-%d" % increment if not dont_output: if not os.path.exists(sorted_path_full): os.mkdir(sorted_path_full) data_image = np.zeros((self.rows, self.cols, 3), dtype=np.int32) rows_xml = table.findall('Row') rows_matrix = np.zeros((len(rows_xml), 4)) rr = 0 last_y = ty1 for row in rows_xml: row_attrib = row.attrib x1 = rows_matrix[rr, 0] = int(row_attrib['x0']) y1 = rows_matrix[rr, 1] = int(row_attrib['y0']) x2 = rows_matrix[rr, 2] = int(row_attrib['x1']) y2 = rows_matrix[rr, 3] = int(row_attrib['y1']) rr += 1 data_image[last_y:y1 + 1, x1:x2 + 1, 0] = rr last_y = y1 + 1 data_image[last_y:ty2, tx1:tx2 + 1, 0] = rr columns_xml = table.findall('Column') cols_matrix = np.zeros((len(columns_xml), 4)) cc = 0 last_x = tx1 for col in columns_xml: col_attrib = col.attrib x1 = cols_matrix[cc, 0] = int(col_attrib['x0']) y1 = cols_matrix[cc, 1] = int(col_attrib['y0']) x2 = cols_matrix[cc, 2] = int(col_attrib['x1']) y2 = cols_matrix[cc, 3] = int(col_attrib['y1']) cc += 1 data_image[y1:y2 + 1, last_x:x1 + 1, 1] = cc last_x = x1 + 1 data_image[ty1:ty2, last_x:tx2, 1] = cc cells_xml = table.findall('Cell') ll = 0 for cell_xml in cells_xml: bounding_box = cell_xml.attrib if bounding_box['dontCare'] == 'true': continue x1 = int(bounding_box['x0']) y1 = int(bounding_box['y0']) x2 = int(bounding_box['x1']) y2 = int(bounding_box['y1']) ll += 1 data_image[y1:y2 + 1, x1:x2 + 1, 2] = ll show_1 = ((data_image[:, :] * 100) % 256).astype(np.uint8) if show: # show_2 = ((data_image[:,:,1] * 100) % 256).astype(np.uint8) # show_3 = ((data_image[:,:,2] * 100) % 256).astype(np.uint8) # show_1 = cv2.resize(show_1, None, fx=0.25, fy=0.25) # cv2.imshow('rows', show_1) # # show_2 = cv2.resize(show_2, None, fx=0.25, fy=0.25) # # cv2.imshow('cols', show_2) # # show_3 = cv2.resize(show_3, None, fx=0.25, fy=0.25) # # cv2.imshow('cells', show_3) # # cv2.waitKey(0) pass all_tokens = [] all_tokens_rects = [] for i in range(len(self.all_tokens)): token = self.all_tokens[i] token_rect = self.all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] if data_image[mid[1], mid[0], 0] == 0: continue all_tokens.append(token) all_tokens_rects.append(token_rect) N = len(all_tokens) row_share_matrix = np.zeros((N, N)) col_share_matrix = np.zeros((N, N)) cell_share_matrix = np.zeros((N, N)) neighbors_same_row = np.zeros((N, 4)) neighbors_same_col = np.zeros((N, 4)) neighbors_same_cell = np.zeros((N, 4)) graph_builder = NeighborGraphBuilder(all_tokens_rects, data_image[:, :, 0]) M, D = graph_builder.get_neighbor_matrix() for i in range(N): left_index = int(M[i, 0]) top_index = int(M[i, 1]) right_index = int(M[i, 2]) bottom_index = int(M[i, 3]) token_rect = all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] if left_index != -1: token_rect_2 = all_tokens_rects[left_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 0] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 0] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 0] = 1 if top_index != -1: token_rect_2 = all_tokens_rects[top_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 1] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 1] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 1] = 1 if right_index != -1: token_rect_2 = all_tokens_rects[right_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 2] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 2] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 2] = 1 if bottom_index != -1: token_rect_2 = all_tokens_rects[bottom_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 3] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 3] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 3] = 1 for i in range(N): token = all_tokens[i] token_rect = all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] for j in range(N): token_2 = all_tokens[j] token_rect_2 = all_tokens_rects[j] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: row_share_matrix[i, j] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: col_share_matrix[i, j] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: cell_share_matrix[i, j] = 1 self.dump_table(all_tokens, all_tokens_rects, M, D, row_share_matrix, col_share_matrix, cell_share_matrix, neighbors_same_row, neighbors_same_col, neighbors_same_cell, show_1, spatial_features, os.path.join(sorted_path_full, '__dump__.pickle')) cv2.imwrite(os.path.join(sorted_path_full, 'visual.png'), show_1)
def dump_doc(self, all_tokens, all_tokens_rects, spatial_features, file_name): N = len(all_tokens) height, width, _ = np.shape(self.image) classes = np.zeros(N) inside_same_table = np.zeros((N, 4)) rect_matrix = np.zeros((N, 4)) embeddings_matrix = np.zeros((N, 300)) features_spatial_height, features_spatial_width, depth = np.shape( spatial_features) conv_features = np.zeros((N, depth)) graph_builder = NeighborGraphBuilder(all_tokens_rects, self.image_tables) if not dont_output: if not os.path.exists(self.sorted_path): os.mkdir(self.sorted_path) neighbor_graph, neighbor_distance_matrix = graph_builder.get_neighbor_matrix( ) neighbor_distance_matrix[:, 0] = neighbor_distance_matrix[:, 0] / width neighbor_distance_matrix[:, 1] = neighbor_distance_matrix[:, 1] / height neighbor_distance_matrix[:, 2] = neighbor_distance_matrix[:, 2] / width neighbor_distance_matrix[:, 3] = neighbor_distance_matrix[:, 3] / height draw_image = np.copy(self.image) for i in range(N): token_rect = all_tokens_rects[i] index = self.image_tables[int(token_rect['y'] + token_rect['height'] / 2), int(token_rect['x'] + token_rect['width'] / 2)] left_rect = all_tokens_rects[int(neighbor_graph[i, 0])] top_rect = all_tokens_rects[int(neighbor_graph[i, 1])] right_rect = all_tokens_rects[int(neighbor_graph[i, 2])] bottom_rect = all_tokens_rects[int(neighbor_graph[i, 3])] if index == 0: index_left = index_right = index_top = index_bottom = 0 else: index_left = 0 if self.image_tables[ int(left_rect['y'] + left_rect['height'] / 2), int(left_rect['x'] + left_rect['width'] / 2)] == index or int(neighbor_graph[i, 0]) == -1 else 1 index_top = 0 if self.image_tables[ int(top_rect['y'] + top_rect['height'] / 2), int(top_rect['x'] + top_rect['width'] / 2)] == index or int(neighbor_graph[i, 1]) == -1 else 1 index_right = 0 if self.image_tables[ int(right_rect['y'] + right_rect['height'] / 2), int(right_rect['x'] + right_rect['width'] / 2)] == index or int(neighbor_graph[i, 2]) == -1 else 1 index_bottom = 0 if self.image_tables[ int(bottom_rect['y'] + bottom_rect['height'] / 2), int(bottom_rect['x'] + bottom_rect['width'] / 2)] == index or int(neighbor_graph[i, 3]) == -1 else 1 inside_same_table[i, 0] = index_left inside_same_table[i, 1] = index_top inside_same_table[i, 2] = index_right inside_same_table[i, 3] = index_bottom color = (0, 0, 255) if index == 0 else (255, 0, 0) if index_left != 0 or index_top != 0 or index_right != 0 or index_bottom != 0: color = (0, 255, 0) cv2.rectangle(draw_image, (int(token_rect['x']), int(token_rect['y'])), (int(token_rect['x'] + token_rect['width']), int(token_rect['y'] + token_rect['height'])), color, 3) draw_path = os.path.join(self.sorted_path, 'visual.png') print(draw_path) cv2.imwrite(draw_path, draw_image) for i in range(N): token_rect = all_tokens_rects[i] index = 0 if self.image_tables[int(token_rect['y'] + token_rect['height'] / 2), int(token_rect['x'] + token_rect['width'] / 2)] == 0 else 1 classes[i] = index rect_matrix[i, 0] = token_rect['x'] / width rect_matrix[i, 1] = token_rect['y'] / height rect_matrix[i, 2] = token_rect['width'] / width rect_matrix[i, 3] = token_rect['height'] / height feat_x = int((rect_matrix[i, 0] + rect_matrix[i, 2] / 2) * features_spatial_width) feat_y = int((rect_matrix[i, 1] + rect_matrix[i, 3] / 2) * features_spatial_height) assert feat_x < features_spatial_width and feat_y < features_spatial_height conv_features[i] = spatial_features[feat_y, feat_x] embedding = self.glove_reader.get_vector(all_tokens[i]) if embedding is None: embedding = np.ones((300)) * (-1) embeddings_matrix[i] = embedding document = TableDetectDocument(embeddings_matrix, rect_matrix, neighbor_distance_matrix, neighbor_graph, classes, conv_features, inside_same_table) with open(file_name, 'wb') as f: pickle.dump(document, f, pickle.HIGHEST_PROTOCOL)
def see_table(self, table, increment): print("Converting doc", self.png_path) table_attributes = table.attrib tx1 = int(table_attributes['x0']) ty1 = int(table_attributes['y0']) tx2 = int(table_attributes['x1']) ty2 = int(table_attributes['y1']) image_table_cropped = self.image[ty1:ty2 + 1, tx1:tx2 + 1] # _, _, 0 = row share # _, _, 1 = column share # _, _, 2 = cell share data_image = np.zeros((self.rows, self.cols, 3), dtype=np.int32) rows_xml = table.findall('Row') rows_matrix = np.zeros((len(rows_xml), 4)) rr = 0 last_y = ty1 for row in rows_xml: row_attrib = row.attrib x1 = rows_matrix[rr, 0] = int(row_attrib['x0']) y1 = rows_matrix[rr, 1] = int(row_attrib['y0']) x2 = rows_matrix[rr, 2] = int(row_attrib['x1']) y2 = rows_matrix[rr, 3] = int(row_attrib['y1']) rr += 1 data_image[last_y:y1 + 1, x1:x2 + 1, 0] = rr last_y = y1 + 1 data_image[last_y:ty2, tx1:tx2 + 1, 0] = rr columns_xml = table.findall('Column') cols_matrix = np.zeros((len(columns_xml), 4)) cc = 0 last_x = tx1 for col in columns_xml: col_attrib = col.attrib x1 = cols_matrix[cc, 0] = int(col_attrib['x0']) y1 = cols_matrix[cc, 1] = int(col_attrib['y0']) x2 = cols_matrix[cc, 2] = int(col_attrib['x1']) y2 = cols_matrix[cc, 3] = int(col_attrib['y1']) cc += 1 data_image[y1:y2 + 1, last_x:x1 + 1, 1] = cc last_x = x1 + 1 data_image[ty1:ty2, last_x:tx2, 1] = cc cells_xml = table.findall('Cell') ll = 0 for cell_xml in cells_xml: bounding_box = cell_xml.attrib if bounding_box['dontCare'] == 'true': continue x1 = int(bounding_box['x0']) y1 = int(bounding_box['y0']) x2 = int(bounding_box['x1']) y2 = int(bounding_box['y1']) ll += 1 data_image[y1:y2 + 1, x1:x2 + 1, 2] = ll show_1 = ((data_image[:, :] * 100) % 256).astype(np.uint8) if show: # show_2 = ((data_image[:,:,1] * 100) % 256).astype(np.uint8) # show_3 = ((data_image[:,:,2] * 100) % 256).astype(np.uint8) # show_1 = cv2.resize(show_1, None, fx=0.25, fy=0.25) # cv2.imshow('rows', show_1) # # show_2 = cv2.resize(show_2, None, fx=0.25, fy=0.25) # # cv2.imshow('cols', show_2) # # show_3 = cv2.resize(show_3, None, fx=0.25, fy=0.25) # # cv2.imshow('cells', show_3) # # cv2.waitKey(0) pass all_tokens = [] all_tokens_rects = [] for i in range(len(self.all_tokens)): token = self.all_tokens[i] token_rect = self.all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] if data_image[mid[1], mid[0], 0] == 0: continue all_tokens.append(token) all_tokens_rects.append(token_rect) N = len(all_tokens) if N == 0: return # If there are no words in the table, its useless anyway row_share_matrix = np.zeros((N, N)) col_share_matrix = np.zeros((N, N)) cell_share_matrix = np.zeros((N, N)) neighbors_same_row = np.zeros((N, 4)) neighbors_same_col = np.zeros((N, 4)) neighbors_same_cell = np.zeros((N, 4)) graph_builder = NeighborGraphBuilder(all_tokens_rects, data_image[:, :, 0]) M, D = graph_builder.get_neighbor_matrix() for i in range(N): left_index = int(M[i, 0]) top_index = int(M[i, 1]) right_index = int(M[i, 2]) bottom_index = int(M[i, 3]) token_rect = all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] if left_index != -1: token_rect_2 = all_tokens_rects[left_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 0] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 0] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 0] = 1 if top_index != -1: token_rect_2 = all_tokens_rects[top_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 1] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 1] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 1] = 1 if right_index != -1: token_rect_2 = all_tokens_rects[right_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 2] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 2] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 2] = 1 if bottom_index != -1: token_rect_2 = all_tokens_rects[bottom_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: neighbors_same_row[i, 3] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: neighbors_same_col[i, 3] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: neighbors_same_cell[i, 3] = 1 for i in range(N): token = all_tokens[i] token_rect = all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] for j in range(N): token_2 = all_tokens[j] token_rect_2 = all_tokens_rects[j] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share row if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]: row_share_matrix[i, j] = 1 # They share column if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]: col_share_matrix[i, j] = 1 # They share cell if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]: cell_share_matrix[i, j] = 1 sorted_path_full = self.sorted_path + "-%d" % increment if not dont_output: if not os.path.exists(sorted_path_full): os.mkdir(sorted_path_full) cv2.imwrite(os.path.join(sorted_path_full, 'visual.png'), show_1) # To place input vectors at respective spatial coordinates input_tensor = np.zeros((256, 256, 308)).astype(np.float64) # Same zone or not, 0 for not, 1 for yes output_tensor = np.zeros((256, 256, 4)).astype(np.float64) # Whether there was a word here or not # output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64) output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64) output_tensor_zone_mask = np.ones((256, 256), dtype=np.float32) table_width = tx2 - tx1 table_height = ty2 - ty1 rgb = np.zeros((256, 256, 3)) glove_not_found = 0.0 for i in range(N): token_rect = all_tokens_rects[i] # Source coordinates of top left of tokens cx = token_rect['x'] - tx1 cy = token_rect['y'] - ty1 cw = token_rect['width'] ch = token_rect['height'] distances_vector = D[i] # Get the GloVe reading embedding = self.glove_reader.get_vector(all_tokens[i]) if embedding is None: embedding = np.ones((300)) * (-1) glove_not_found += 1 positional = np.array([ cx / table_width, cy / table_height, cw / table_width, ch / table_height, distances_vector[0] / table_width, distances_vector[1] / table_height, distances_vector[2] / table_width, distances_vector[3] / table_height ]) # Destination coordinates on 256x256 scale and place there nx = math.floor(256.0 * cx / table_width) ny = math.floor(256.0 * cy / table_height) input_tensor[ny, nx] = np.concatenate((embedding, positional)) # From the neighbor graph output_tensor[ny, nx] = np.array([ neighbors_same_cell[i, 0], neighbors_same_cell[i, 1], neighbors_same_cell[i, 2], neighbors_same_cell[i, 3] ]) if neighbors_same_cell[i, 0] == 1 or neighbors_same_cell[i, 1] == 1: rgb[ny, nx] = np.array([0, 0, 255]) else: rgb[ny, nx] = np.array([255, 255, 255]) # Set mask to 1 # output_tensor_word_mask[ny, nx] =1 # print (output_tensor_word_mask[ny, nx]) output_tensor_word_mask[ny, nx] = 1 if glove_not_found / N > 0.3: print("WARNING: GloVe not found ratio", glove_not_found / N) # Output debugging visual file for zone mask segmentation_visualize_path = os.path.join(sorted_path_full, 'visual_segment.png') cv2.imwrite(segmentation_visualize_path, (output_tensor_zone_mask * 255).astype(np.uint8)) # Output debugging visual image for word mask word_mask_path = os.path.join(sorted_path_full, 'visual_word_mask.png') output_tensor_word_mask_temp = (rgb.transpose( (2, 0, 1)) * output_tensor_zone_mask).transpose(1, 2, 0) # output_tensor_word_mask_temp=rgb*np.repeat(output_tensor_zone_mask,3).reshape((256,256,3)) # output_tensor_zone_mask_temp = np.resize(output_tensor_zone_mask, (256, 256, 3)) # output_tensor_word_mask=np.multiply(rgb,output_tensor_zone_mask_temp ) cv2.imwrite(word_mask_path, rgb.astype(np.uint8)) word_mask_path_1 = os.path.join(sorted_path_full, 'visual_word_mask_masked.png') cv2.imwrite(word_mask_path_1, output_tensor_word_mask_temp.astype(np.uint8)) # cv2.imwrite(word_mask_path, (output_tensor_word_mask *255).astype(np.uint8)) cv2.imwrite(os.path.join(sorted_path_full, 'table_cropped.png'), image_table_cropped) # Dump the content to pickle file. The file is compressed by gzip. dump_path = os.path.join(sorted_path_full, '__dump__.pklz') document = TableParseDocument(input_tensor, output_tensor, output_tensor_word_mask, output_tensor_zone_mask) f = gzip.open(dump_path, 'wb') pickle.dump(document, f) f.close()
def execute_tokens(self): # To get local neighbors of each token: Left, right, top, bottom graph_builder = NeighborGraphBuilder(self.all_tokens_rects, self.image[:, :, 0]) # M is the indices graph and D is distance matrix M, D = graph_builder.get_neighbor_matrix() N = len(self.all_tokens) neighbors_same_zone = np.zeros((N, 4)) for i in range(N): left_index = int(M[i, 0]) top_index = int(M[i, 1]) right_index = int(M[i, 2]) bottom_index = int(M[i, 3]) token_rect = self.all_tokens_rects[i] mid = [ int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2) ] if left_index != -1: token_rect_2 = self.all_tokens_rects[left_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share zone if self.zone_segmentation[ mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]: neighbors_same_zone[i, 0] = 1 else: neighbors_same_zone[i, 0] = 110 else: neighbors_same_zone[i, 0] = 1 if top_index != -1: token_rect_2 = self.all_tokens_rects[top_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share zone if self.zone_segmentation[ mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]: neighbors_same_zone[i, 1] = 1 else: neighbors_same_zone[i, 1] = 110 else: neighbors_same_zone[i, 0] = 1 if right_index != -1: token_rect_2 = self.all_tokens_rects[right_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share zone if self.zone_segmentation[ mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]: neighbors_same_zone[i, 2] = 1 else: neighbors_same_zone[i, 1] = 110 else: neighbors_same_zone[i, 0] = 1 if bottom_index != -1: token_rect_2 = self.all_tokens_rects[bottom_index] mid_2 = [ int(token_rect_2['x'] + token_rect_2['width'] / 2), int(token_rect_2['y'] + token_rect_2['height'] / 2) ] # They share zone if self.zone_segmentation[ mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]: neighbors_same_zone[i, 3] = 1 else: neighbors_same_zone[i, 1] = 110 else: neighbors_same_zone[i, 0] = 1 # To place input vectors at respective spatial coordinates input_tensor = np.zeros((256, 256, 308)).astype(np.float64) # Same zone or not, 0 for not, 1 for yes output_tensor = np.zeros((256, 256, 4)).astype(np.float64) # Whether there was a word here or not # output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64) output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64) # Whether there was a zone here or not self.zone_segmentation[self.zone_segmentation != 0] = 1 output_tensor_zone_mask = cv2.resize(self.zone_segmentation, (256, 256)) # output_tensor_zone_mask = output_tensor_zone_mask_temp.reshape(-1,3) for i in range(N): token_rect = self.all_tokens_rects[i] # mid = [int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2)] # Source coordinates of top left of tokens cx = token_rect['x'] cy = token_rect['y'] cw = token_rect['width'] ch = token_rect['height'] # token_rect_2 = self.all_tokens_rects[top_index] # mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2), # int(token_rect_2['y'] + token_rect_2['height'] / 2)] distances_vector = D[i] # Get the GloVe reading embedding = self.glove_reader.get_vector(self.all_tokens[i]) if embedding is None: embedding = np.ones((300)) * (-1) positional = np.array([ cx / self.width, cx / self.height, cw / self.width, ch / self.width, distances_vector[0] / self.width, distances_vector[1] / self.height, distances_vector[2] / self.width, distances_vector[3] / self.height ]) # Destination coordinates on 256x256 scale and place there nx = math.floor(256.0 * cx / self.width) ny = math.floor(256.0 * cy / self.height) input_tensor[ny, nx] = np.concatenate((embedding, positional)) # From the neighbor graph output_tensor[ny, nx] = np.array([ neighbors_same_zone[i, 0], neighbors_same_zone[i, 1], neighbors_same_zone[i, 2], neighbors_same_zone[i, 3] ]) if any(x == 110 for x in output_tensor[ny, nx]): output_tensor_word_mask[ny, nx] = 110 else: output_tensor_word_mask[ny, nx] = 1 # Set mask to 1 # output_tensor_word_mask[ny, nx] =1 # print (output_tensor_word_mask[ny, nx]) print(self.sorted_path) rgb = np.zeros((256, 256, 3)) for i in range(output_tensor_word_mask.shape[0]): for j in range(output_tensor_word_mask.shape[1]): if output_tensor_word_mask[i, j] == 1.0: rgb[i, j, 0] = 255 rgb[i, j, 1] = 255 rgb[i, j, 2] = 255 elif output_tensor_word_mask[i, j] == 110.0: rgb[i, j, 0] = 255 rgb[i, j, 1] = 0 rgb[i, j, 2] = 0 # Output debugging visual file for zone mask segmentation_visualize_path = os.path.join(self.sorted_path, 'visual_segment.png') cv2.imwrite(segmentation_visualize_path, (output_tensor_zone_mask * 255).astype(np.uint8)) # Output debugging visual image for word mask word_mask_path = os.path.join(self.sorted_path, 'visual_word_mask.png') output_tensor_word_mask_temp = (rgb.transpose( (2, 0, 1)) * output_tensor_zone_mask).transpose(1, 2, 0) # output_tensor_word_mask_temp=rgb*np.repeat(output_tensor_zone_mask,3).reshape((256,256,3)) print(output_tensor_word_mask_temp.shape) # output_tensor_zone_mask_temp = np.resize(output_tensor_zone_mask, (256, 256, 3)) # output_tensor_word_mask=np.multiply(rgb,output_tensor_zone_mask_temp ) matplotlib.image.imsave(word_mask_path, rgb.astype(np.uint8)) word_mask_path_1 = os.path.join(self.sorted_path, 'visual_word_mask_masked.png') matplotlib.image.imsave(word_mask_path_1, output_tensor_word_mask_temp.astype(np.uint8)) # cv2.imwrite(word_mask_path, (output_tensor_word_mask *255).astype(np.uint8)) # Dump the content to pickle file. The file is compressed by gzip. dump_path = os.path.join(self.sorted_path, '__dump__.pklz') document = ZoneSegmentDocument(input_tensor, output_tensor, output_tensor_word_mask, output_tensor_zone_mask) f = gzip.open(dump_path, 'wb') pickle.dump(document, f) f.close()