def store_iam_original(pair): visualization_path = os.path.join(pair.base, "original", str(pair.index) + ".png") create_folders(visualization_path) image = cairo.ImageSurface.create_from_png(pair.img) context = cairo.Context(image) for component in pair.get_components(): context.rectangle( component["x"], component["y"], component["width"], component["height"], ) context.set_source_rgba(0, 0, 1, 0.3) context.fill() image.write_to_png(visualization_path)
def get_custom_transformation(pair, trim_out_of_bounds=True): visualization_path = os.path.join(pair.base, "custom_transformation", "visualization", pair.stem(extension=".png")) create_folders(visualization_path) image = cairo.ImageSurface.create_from_png(pair.img) image = convert_to_grayscale(image) context = cairo.Context(image) page_data = pair.extract_ground_truth() lines = [] for i, line in enumerate(page_data): steps = [] for j, lf in enumerate(line["lf"]): step = Step.get_from(lf) if step.is_within(image): steps.append(step) for k, step in enumerate(steps): color = (255, 0, 0) if k == 0 else ((0, 0, 255) if k == len(steps) - 1 else (0, 255, 0)) draw_custom_lf(context, step, color) sol = steps[0] eol = steps[-1] lines.append({ "steps": list(map(lambda x: x.__dict__, steps)), "text": line["ground_truth"] }) image.write_to_png(visualization_path) return { "origin": pair.base, "basename": pair.stem(), "lines": lines, "image": pair.img, "xml": pair.transformation_xml_path, }
def store_page_visualization(pair): visualization_path = os.path.join(pair.base, "pages", "visualization", pair.stem(extension=".png")) create_folders(visualization_path) image = cairo.ImageSurface.create_from_png(pair.img) context = cairo.Context(image) page_data = pair.extract_ground_truth() for line in page_data: color = next_color() # Draw bounding polygon polygon = line["bounding_poly"] context.move_to(polygon[-1][0], polygon[-1][1]) for coordinate in polygon: context.line_to(*coordinate) context.line_to(*(polygon[0])) context.set_operator(cairo.Operator.MULTIPLY) context.set_line_width(3) context.set_source_rgba(*color, 0.2) context.stroke() # Draw baseline baseline = line["baseline"] context.move_to(*baseline[0]) for coordinate in baseline[1:]: context.line_to(*coordinate) context.set_operator(cairo.Operator.MULTIPLY) context.set_line_width(3) context.set_source_rgba(*next_color(), 0.7) context.stroke() context.move_to(*coordinate) image.write_to_png(visualization_path)
def paint_model_run(model_path, img_path, dataloader, destination="screenshots/run.png"): dtype = torch.cuda.FloatTensor painter = Painter(path=img_path) lol = LineOutlinerTsa(path=model_path) lol.cuda() for index, x in enumerate(dataloader): x = x[0] belongs = img_path == x["img_path"] if not belongs: continue img = x['img'].type(dtype)[None, ...] ground_truth = x["steps"] predictions = [ground_truth[1]] for i in range(60): torch.cuda.empty_cache() predicted_step = lol(img, torch.stack(predictions), sol_index=len(predictions) - 1, disturb_sol=False) if predicted_step is None: break if torch.dist(predicted_step[0], predicted_step[1]).item() > 160: break if predicted_step[4][0].item() > 0.85: break predictions.append(predicted_step.clone().detach().cpu()) upper_points = [Point(step[0][0].item(), step[0][1].item()) for step in predictions] ground_truth_baseline_steps = [Point(step[1][0].item(), step[1][1].item()) for step in predictions] lower_points = [Point(step[2][0].item(), step[2][1].item()) for step in predictions] confidences = [step[4][0].item() for step in predictions] # for index, step in enumerate(ground_truth_baseline_steps[:-1]): # upper_height, lower_height = base_height * predicted_steps[index][2].item(), \ # base_height * predicted_steps[index][3].item() # next_step = ground_truth_baseline_steps[index + 1] # angle_between_them = angle_between_points(step, next_step) # upper_point = get_new_point(step, angle_between_them - 90, upper_height) # lower_point = get_new_point(step, angle_between_them + 90, lower_height) # painter.draw_line([upper_point, lower_point], line_width=1, color=(0, 0, 0, 0.5)) # upper_points.append(upper_point) # lower_points.append(lower_point) # # painter.draw_line(upper_points, line_width=2, color=(1, 0, 1, 1)) # painter.draw_line(lower_points, line_width=2, color=(1, 0, 1, 1)) painter.draw_line(lower_points, line_width=2, color=(1, 0, 1, 0.5)) painter.draw_line(ground_truth_baseline_steps, line_width=2, color=(0, 0, 1, 0.5)) painter.draw_line(upper_points, line_width=2, color=(1, 0, 1, 0.5)) for i in range(len(confidences) - 1): confidence = confidences[i] painter.draw_area([upper_points[i], upper_points[i + 1], lower_points[i + 1], lower_points[i]], fill_color=(confidence, 1 - confidence, 0, 0.05 + confidence)) sol = { "upper_point": ground_truth[0][0], "base_point": ground_truth[0][1], "angle": ground_truth[0][3][0], } sol_upper = Point(sol["upper_point"][0].item(), sol["upper_point"][1].item()) sol_lower = Point(sol["base_point"][0].item(), sol["base_point"][1].item()) painter.draw_line([sol_lower, sol_upper], color=(0, 1, 0, 1), line_width=4) painter.draw_point(sol_lower, color=(0, 1, 0, 1), radius=4) painter.draw_point(sol_upper, color=(0, 1, 0, 1), radius=4) create_folders(destination) painter.save(destination)
parser.add_argument("--patch_ratio", default=3) # iam -> 5 parser.add_argument("--patch_size", default=64) # How big are the patches in pixels parser.add_argument( "--min_height", default=8 ) # Min line size in pixels, to prevent going to 0 during training # Training techniques parser.add_argument("--name", default="training") parser.add_argument("--output", default="snapshots/lol") args = parser.parse_args() ### SAVE ARGUMENTS args_filename = os.path.join(args.output, args.name, 'args.json') create_folders(args_filename) with open(args_filename, 'w') as fp: json.dump(args.__dict__, fp, indent=4) training_set_list_path = os.path.join(args.dataset_folder, "training.json") training_set_list = load_file_list_direct(training_set_list_path) train_dataset = LolDataset(training_set_list, augmentation=True) train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=dataset.collate) batches_per_epoch = int(int(args.images_per_epoch) / args.batch_size) train_dataloader = DatasetWrapper(train_dataloader, batches_per_epoch) test_set_list_path = os.path.join(args.dataset_folder, "testing.json")
def save(self, path="test.png"): create_folders(path) self.surface.write_to_png(path)
page_index + ".xml") assert os.path.exists(img_path) and os.path.isfile(img_path) assert os.path.exists(xml_path) and os.path.isfile(xml_path) # index, base_folder, img_filename, xml_filename pair = ImageXmlPair(page_index, database_original_folder, img_path, xml_path) pairs.append(pair) # For each pair # Create a folder with its name # Extract lines # Create JSON with line information # Add to data json data for pair in pairs: folder_path = os.path.join(target_folder, "pages", "data", pair.index) create_folders(os.path.join(folder_path, "something.txt")) image_json_path = os.path.join(target_folder, "pages", "data", pair.index, pair.index + ".json") image_json = [] pair.set_height_threshold(0.1) image_data = run_transformation_approach(pair, alpha=0.0025) steps = to_steps({pair.index: image_data}, [pair]) image_steps = steps["images"][0] dataset_json_data.append([image_json_path, image_steps["filename"]]) image = TrainingImage(image_steps) for line in image.lines: LineAugmentation.normalize(line) LineAugmentation.extend_backwards(line) LineAugmentation.extend(line, by=6,
def to_steps(data, pairs, visualize=True): result = { "images": [] } for i, page_index in enumerate(data): pair = pairs[i] print("Stepping pair #" + str(pair.index)) image_data = { "index": pair.index, "filename": pair.img, "lines": [] } image = cairo.ImageSurface.create_from_png(pair.img) context = cairo.Context(image) if visualize: for component in pair.get_components(): context.rectangle(component["x"], component["y"], component["width"], component["height"], ) context.set_source_rgba(0, 0, 1, 0.1) context.fill() for line_index in data[page_index]: line = data[page_index][line_index] baseline = line["baseline"] hull = line["hull"] line_data = { "text": line["text"], "steps": [] } line_slope = slope(baseline) start_point = baseline[0] distance_walked = 0 total_distance = distance(baseline[0], baseline[1]) height_threshold = 20 context.set_operator(cairo.OPERATOR_MULTIPLY) context.set_line_width(5) upper_points = [] lower_points = [] baseline_points = [] while distance_walked < total_distance: intersecting_line = perpendicular(start_point, baseline) intersection = intersecting_line.intersection(hull) upper_point = None lower_point = None if isinstance(intersection, MultiPoint) and len(intersection.bounds) == 4: upper_point = [intersection.bounds[0], intersection.bounds[1]] lower_point = [intersection.bounds[2], intersection.bounds[3]] elif isinstance(intersection, LineString) and len(intersection.bounds) == 4: upper_point = [intersection.bounds[0], intersection.bounds[1]] lower_point = [intersection.bounds[2], intersection.bounds[3]] elif isinstance(intersection, Point): print("Intersection was point, moving forward") start_point = walk(start_point, line_slope, 4) continue else: if distance_walked == 0: start_point = walk(start_point, line_slope, 4) else: print("No intersection, skipping line " + str(line_index) + " of " + str( pair.index) + " after walking" + str(distance_walked)) distance_walked = total_distance continue if upper_point is not None and lower_point is not None: upper_points.append(upper_point) lower_points.append(lower_point) baseline_intersection = LineString( [Point(upper_point[0], upper_point[1]), Point(lower_point[0], lower_point[1])]) \ .intersection(LineString(to_points(baseline))) baseline_point = None if isinstance(baseline_intersection, Point) and len(baseline_intersection.bounds) > 1: baseline_point = [baseline_intersection.bounds[0], baseline_intersection.bounds[1]] else: baseline_point = lower_point baseline_points.append(baseline_point) height = distance(upper_point, baseline_point) if height < height_threshold and distance_walked == 0: # The first point doesnt have a height if distance_walked == 0: angle = angle_between_points(to_points(baseline)[0], to_points(baseline)[1]) new_upper_point = get_new_point(to_points(baseline)[0], angle - 90, height_threshold) upper_point = [new_upper_point.x, new_upper_point.y] if height < height_threshold: height = height_threshold context.set_source_rgba(1, 0, 1, 1) context.move_to(upper_point[0], upper_point[1]) context.line_to(lower_point[0], lower_point[1]) context.stroke() context.set_source_rgba(0, 0, 1, 0.1) context.move_to(start_point[0], start_point[1]) start_point = walk(start_point, line_slope, height) distance_walked += height context.line_to(start_point[0], start_point[1]) context.stroke() else: distance_walked = total_distance for pc in [baseline_points, upper_points, lower_points]: if len(pc) == 0: continue context.set_source_rgba(1, 0, 1, 0.3) context.move_to(pc[0][0], pc[0][1]) for bp in pc: context.line_to(bp[0], bp[1]) context.stroke() for i in range(len(baseline_points)): line_data["steps"].append({ "upper_point": upper_points[i], "lower_point": lower_points[i], "base_point": baseline_points[i], }) line_data["index"] = line_index image_data["lines"].append(line_data) result["images"].append(image_data) save_path = os.path.join(pair.base, "json", str(image_data["index"]) + ".json") save_to_json(image_data, save_path) if visualize: visualization_path = os.path.join(pair.base, "stepped", str(pair.index) + ".png") create_folders(visualization_path) image.write_to_png(visualization_path) return result
def run_transformation_approach(pair, alpha=0.004, visualization_path=None): print("Transforming pair #" + str(pair.index)) image = cairo.ImageSurface.create_from_png(pair.img) context = cairo.Context(image) for component in pair.get_components(): context.rectangle( component["x"], component["y"], component["width"], component["height"], ) context.set_source_rgba(0, 0, 1, 0.1) context.fill() size_threshold = 50 line_components = {} for component in pair.get_components(): data = pointsOf(component) if data["index"] not in line_components: line_components[data["index"]] = [] line_components[data["index"]].append(data["top_left"]) line_components[data["index"]].append(data["top_right"]) line_components[data["index"]].append(data["bottom_right"]) line_components[data["index"]].append(data["bottom_left"]) multi = 0 context.set_operator(cairo.OPERATOR_MULTIPLY) context.set_line_width(5) context.set_source_rgba(1, 0, 0, 1) transformation = pair.get_transformation() used_data = {} amount_of_lines = len(transformation["lines"]) for line in transformation["lines"]: baseline = line["baseline"] start = baseline[0] end = baseline[1] context.move_to(start[0], start[1]) context.line_to(end[0], end[1]) context.stroke() for line_index in line_components: points = line_components[line_index] # The following proves that I don't know numpy in the slightest x = [p[0] for p in points] y = [p[1] for p in points] coords = [Point(p[0], p[1]) for p in points] concave_hull, edge_points = alpha_shape(coords, alpha=alpha) if isinstance(concave_hull, MultiPolygon): continue for exterior in [concave_hull.exterior]: context.set_operator(cairo.OPERATOR_MULTIPLY) context.set_line_width(3) context.set_source_rgba(0, 1, 0.3, 1) context.move_to(exterior.coords[0][0], exterior.coords[0][1]) for point in exterior.coords: context.line_to(point[0], point[1]) context.stroke() used_data[line_index] = { "index": line_index, "hull": concave_hull.exterior, "baseline": transformation["lines"][line_index]["baseline"], "text": transformation["lines"][line_index]["gt"], } if visualization_path is not None: visualization_path = os.path.join(visualization_path, str(pair.index) + ".png") create_folders(visualization_path) image.write_to_png(visualization_path) return used_data