示例#1
0
def store_iam_original(pair):
    visualization_path = os.path.join(pair.base, "original",
                                      str(pair.index) + ".png")
    create_folders(visualization_path)
    image = cairo.ImageSurface.create_from_png(pair.img)
    context = cairo.Context(image)
    for component in pair.get_components():
        context.rectangle(
            component["x"],
            component["y"],
            component["width"],
            component["height"],
        )
        context.set_source_rgba(0, 0, 1, 0.3)
        context.fill()
    image.write_to_png(visualization_path)
示例#2
0
def get_custom_transformation(pair, trim_out_of_bounds=True):
    visualization_path = os.path.join(pair.base, "custom_transformation",
                                      "visualization",
                                      pair.stem(extension=".png"))
    create_folders(visualization_path)

    image = cairo.ImageSurface.create_from_png(pair.img)
    image = convert_to_grayscale(image)
    context = cairo.Context(image)

    page_data = pair.extract_ground_truth()
    lines = []
    for i, line in enumerate(page_data):

        steps = []
        for j, lf in enumerate(line["lf"]):
            step = Step.get_from(lf)
            if step.is_within(image):
                steps.append(step)

        for k, step in enumerate(steps):
            color = (255, 0,
                     0) if k == 0 else ((0, 0, 255) if k == len(steps) - 1 else
                                        (0, 255, 0))
            draw_custom_lf(context, step, color)

        sol = steps[0]
        eol = steps[-1]

        lines.append({
            "steps": list(map(lambda x: x.__dict__, steps)),
            "text": line["ground_truth"]
        })

    image.write_to_png(visualization_path)

    return {
        "origin": pair.base,
        "basename": pair.stem(),
        "lines": lines,
        "image": pair.img,
        "xml": pair.transformation_xml_path,
    }
示例#3
0
def store_page_visualization(pair):
    visualization_path = os.path.join(pair.base, "pages", "visualization",
                                      pair.stem(extension=".png"))
    create_folders(visualization_path)

    image = cairo.ImageSurface.create_from_png(pair.img)
    context = cairo.Context(image)

    page_data = pair.extract_ground_truth()

    for line in page_data:
        color = next_color()

        # Draw bounding polygon
        polygon = line["bounding_poly"]
        context.move_to(polygon[-1][0], polygon[-1][1])
        for coordinate in polygon:
            context.line_to(*coordinate)
        context.line_to(*(polygon[0]))
        context.set_operator(cairo.Operator.MULTIPLY)
        context.set_line_width(3)
        context.set_source_rgba(*color, 0.2)
        context.stroke()

        # Draw baseline
        baseline = line["baseline"]

        context.move_to(*baseline[0])

        for coordinate in baseline[1:]:
            context.line_to(*coordinate)
            context.set_operator(cairo.Operator.MULTIPLY)
            context.set_line_width(3)
            context.set_source_rgba(*next_color(), 0.7)
            context.stroke()
            context.move_to(*coordinate)

    image.write_to_png(visualization_path)
示例#4
0
def paint_model_run(model_path, img_path, dataloader, destination="screenshots/run.png"):
    dtype = torch.cuda.FloatTensor

    painter = Painter(path=img_path)

    lol = LineOutlinerTsa(path=model_path)
    lol.cuda()

    for index, x in enumerate(dataloader):
        x = x[0]

        belongs = img_path == x["img_path"]

        if not belongs:
            continue

        img = x['img'].type(dtype)[None, ...]
        ground_truth = x["steps"]

        predictions = [ground_truth[1]]

        for i in range(60):
            torch.cuda.empty_cache()
            predicted_step = lol(img,
                                 torch.stack(predictions),
                                 sol_index=len(predictions) - 1,
                                 disturb_sol=False)
            if predicted_step is None:
                break
            if torch.dist(predicted_step[0], predicted_step[1]).item() > 160:
                break
            if predicted_step[4][0].item() > 0.85:
                break
            predictions.append(predicted_step.clone().detach().cpu())

        upper_points = [Point(step[0][0].item(), step[0][1].item()) for step in predictions]
        ground_truth_baseline_steps = [Point(step[1][0].item(), step[1][1].item()) for step in predictions]
        lower_points = [Point(step[2][0].item(), step[2][1].item()) for step in predictions]
        confidences = [step[4][0].item() for step in predictions]
        # for index, step in enumerate(ground_truth_baseline_steps[:-1]):
        #     upper_height, lower_height = base_height * predicted_steps[index][2].item(), \
        #                                  base_height * predicted_steps[index][3].item()
        #     next_step = ground_truth_baseline_steps[index + 1]
        #     angle_between_them = angle_between_points(step, next_step)
        #     upper_point = get_new_point(step, angle_between_them - 90, upper_height)
        #     lower_point = get_new_point(step, angle_between_them + 90, lower_height)
        #     painter.draw_line([upper_point, lower_point], line_width=1, color=(0, 0, 0, 0.5))
        #     upper_points.append(upper_point)
        #     lower_points.append(lower_point)
        #
        # painter.draw_line(upper_points, line_width=2, color=(1, 0, 1, 1))
        # painter.draw_line(lower_points, line_width=2, color=(1, 0, 1, 1))
        painter.draw_line(lower_points, line_width=2, color=(1, 0, 1, 0.5))
        painter.draw_line(ground_truth_baseline_steps, line_width=2, color=(0, 0, 1, 0.5))
        painter.draw_line(upper_points, line_width=2, color=(1, 0, 1, 0.5))

        for i in range(len(confidences) - 1):
            confidence = confidences[i]
            painter.draw_area([upper_points[i], upper_points[i + 1], lower_points[i + 1], lower_points[i]],
                              fill_color=(confidence, 1 - confidence, 0, 0.05 + confidence))
        sol = {
            "upper_point": ground_truth[0][0],
            "base_point": ground_truth[0][1],
            "angle": ground_truth[0][3][0],
        }

        sol_upper = Point(sol["upper_point"][0].item(), sol["upper_point"][1].item())
        sol_lower = Point(sol["base_point"][0].item(), sol["base_point"][1].item())

        painter.draw_line([sol_lower, sol_upper], color=(0, 1, 0, 1), line_width=4)
        painter.draw_point(sol_lower, color=(0, 1, 0, 1), radius=4)
        painter.draw_point(sol_upper, color=(0, 1, 0, 1), radius=4)

    create_folders(destination)
    painter.save(destination)
示例#5
0
parser.add_argument("--patch_ratio", default=3)  # iam -> 5
parser.add_argument("--patch_size",
                    default=64)  # How big are the patches in pixels
parser.add_argument(
    "--min_height", default=8
)  # Min line size in pixels, to prevent going to 0 during training

# Training techniques
parser.add_argument("--name", default="training")
parser.add_argument("--output", default="snapshots/lol")

args = parser.parse_args()

### SAVE ARGUMENTS
args_filename = os.path.join(args.output, args.name, 'args.json')
create_folders(args_filename)
with open(args_filename, 'w') as fp:
    json.dump(args.__dict__, fp, indent=4)

training_set_list_path = os.path.join(args.dataset_folder, "training.json")
training_set_list = load_file_list_direct(training_set_list_path)
train_dataset = LolDataset(training_set_list, augmentation=True)
train_dataloader = DataLoader(train_dataset,
                              batch_size=1,
                              shuffle=True,
                              num_workers=0,
                              collate_fn=dataset.collate)
batches_per_epoch = int(int(args.images_per_epoch) / args.batch_size)
train_dataloader = DatasetWrapper(train_dataloader, batches_per_epoch)

test_set_list_path = os.path.join(args.dataset_folder, "testing.json")
示例#6
0
 def save(self, path="test.png"):
     create_folders(path)
     self.surface.write_to_png(path)
示例#7
0
                                page_index + ".xml")
        assert os.path.exists(img_path) and os.path.isfile(img_path)
        assert os.path.exists(xml_path) and os.path.isfile(xml_path)
        # index, base_folder, img_filename, xml_filename
        pair = ImageXmlPair(page_index, database_original_folder, img_path,
                            xml_path)
        pairs.append(pair)

    # For each pair
    # Create a folder with its name
    # Extract lines
    # Create JSON with line information
    # Add to data json data
    for pair in pairs:
        folder_path = os.path.join(target_folder, "pages", "data", pair.index)
        create_folders(os.path.join(folder_path, "something.txt"))
        image_json_path = os.path.join(target_folder, "pages", "data",
                                       pair.index, pair.index + ".json")
        image_json = []
        pair.set_height_threshold(0.1)
        image_data = run_transformation_approach(pair, alpha=0.0025)
        steps = to_steps({pair.index: image_data}, [pair])
        image_steps = steps["images"][0]
        dataset_json_data.append([image_json_path, image_steps["filename"]])
        image = TrainingImage(image_steps)

        for line in image.lines:
            LineAugmentation.normalize(line)
            LineAugmentation.extend_backwards(line)
            LineAugmentation.extend(line,
                                    by=6,
示例#8
0
def to_steps(data, pairs, visualize=True):
    result = {
        "images": []
    }

    for i, page_index in enumerate(data):
        pair = pairs[i]

        print("Stepping pair #" + str(pair.index))

        image_data = {
            "index": pair.index,
            "filename": pair.img,
            "lines": []
        }

        image = cairo.ImageSurface.create_from_png(pair.img)
        context = cairo.Context(image)

        if visualize:
            for component in pair.get_components():
                context.rectangle(component["x"], component["y"], component["width"], component["height"], )
                context.set_source_rgba(0, 0, 1, 0.1)
                context.fill()

        for line_index in data[page_index]:

            line = data[page_index][line_index]
            baseline = line["baseline"]
            hull = line["hull"]
            line_data = {
                "text": line["text"],
                "steps": []
            }

            line_slope = slope(baseline)
            start_point = baseline[0]
            distance_walked = 0
            total_distance = distance(baseline[0], baseline[1])

            height_threshold = 20
            context.set_operator(cairo.OPERATOR_MULTIPLY)
            context.set_line_width(5)
            upper_points = []
            lower_points = []
            baseline_points = []

            while distance_walked < total_distance:
                intersecting_line = perpendicular(start_point, baseline)
                intersection = intersecting_line.intersection(hull)

                upper_point = None
                lower_point = None

                if isinstance(intersection, MultiPoint) and len(intersection.bounds) == 4:
                    upper_point = [intersection.bounds[0], intersection.bounds[1]]
                    lower_point = [intersection.bounds[2], intersection.bounds[3]]
                elif isinstance(intersection, LineString) and len(intersection.bounds) == 4:
                    upper_point = [intersection.bounds[0], intersection.bounds[1]]
                    lower_point = [intersection.bounds[2], intersection.bounds[3]]
                elif isinstance(intersection, Point):
                    print("Intersection was point, moving forward")
                    start_point = walk(start_point, line_slope, 4)
                    continue
                else:
                    if distance_walked == 0:
                        start_point = walk(start_point, line_slope, 4)
                    else:
                        print("No intersection, skipping line " + str(line_index) + " of " + str(
                            pair.index) + " after walking" + str(distance_walked))
                        distance_walked = total_distance
                    continue

                if upper_point is not None and lower_point is not None:

                    upper_points.append(upper_point)
                    lower_points.append(lower_point)

                    baseline_intersection = LineString(
                        [Point(upper_point[0], upper_point[1]), Point(lower_point[0], lower_point[1])]) \
                        .intersection(LineString(to_points(baseline)))

                    baseline_point = None
                    if isinstance(baseline_intersection, Point) and len(baseline_intersection.bounds) > 1:
                        baseline_point = [baseline_intersection.bounds[0], baseline_intersection.bounds[1]]
                    else:
                        baseline_point = lower_point
                    baseline_points.append(baseline_point)

                    height = distance(upper_point, baseline_point)

                    if height < height_threshold and distance_walked == 0:
                        # The first point doesnt have a height
                        if distance_walked == 0:
                            angle = angle_between_points(to_points(baseline)[0], to_points(baseline)[1])
                            new_upper_point = get_new_point(to_points(baseline)[0], angle - 90, height_threshold)
                            upper_point = [new_upper_point.x, new_upper_point.y]

                    if height < height_threshold:
                        height = height_threshold

                    context.set_source_rgba(1, 0, 1, 1)
                    context.move_to(upper_point[0], upper_point[1])
                    context.line_to(lower_point[0], lower_point[1])
                    context.stroke()

                    context.set_source_rgba(0, 0, 1, 0.1)
                    context.move_to(start_point[0], start_point[1])
                    start_point = walk(start_point, line_slope, height)
                    distance_walked += height
                    context.line_to(start_point[0], start_point[1])
                    context.stroke()

                else:
                    distance_walked = total_distance

            for pc in [baseline_points, upper_points, lower_points]:
                if len(pc) == 0:
                    continue
                context.set_source_rgba(1, 0, 1, 0.3)
                context.move_to(pc[0][0], pc[0][1])
                for bp in pc:
                    context.line_to(bp[0], bp[1])
                context.stroke()

            for i in range(len(baseline_points)):
                line_data["steps"].append({
                    "upper_point": upper_points[i],
                    "lower_point": lower_points[i],
                    "base_point": baseline_points[i],
                })

            line_data["index"] = line_index
            image_data["lines"].append(line_data)

        result["images"].append(image_data)
        save_path = os.path.join(pair.base, "json", str(image_data["index"]) + ".json")
        save_to_json(image_data, save_path)
        if visualize:
            visualization_path = os.path.join(pair.base, "stepped", str(pair.index) + ".png")
            create_folders(visualization_path)
            image.write_to_png(visualization_path)

    return result
示例#9
0
def run_transformation_approach(pair, alpha=0.004, visualization_path=None):
    print("Transforming pair #" + str(pair.index))

    image = cairo.ImageSurface.create_from_png(pair.img)
    context = cairo.Context(image)

    for component in pair.get_components():
        context.rectangle(
            component["x"],
            component["y"],
            component["width"],
            component["height"],
        )
        context.set_source_rgba(0, 0, 1, 0.1)
        context.fill()
    size_threshold = 50

    line_components = {}
    for component in pair.get_components():
        data = pointsOf(component)
        if data["index"] not in line_components:
            line_components[data["index"]] = []
        line_components[data["index"]].append(data["top_left"])
        line_components[data["index"]].append(data["top_right"])
        line_components[data["index"]].append(data["bottom_right"])
        line_components[data["index"]].append(data["bottom_left"])

    multi = 0

    context.set_operator(cairo.OPERATOR_MULTIPLY)
    context.set_line_width(5)
    context.set_source_rgba(1, 0, 0, 1)

    transformation = pair.get_transformation()

    used_data = {}
    amount_of_lines = len(transformation["lines"])

    for line in transformation["lines"]:
        baseline = line["baseline"]
        start = baseline[0]
        end = baseline[1]
        context.move_to(start[0], start[1])
        context.line_to(end[0], end[1])
        context.stroke()

    for line_index in line_components:
        points = line_components[line_index]

        # The following proves that I don't know numpy in the slightest
        x = [p[0] for p in points]
        y = [p[1] for p in points]

        coords = [Point(p[0], p[1]) for p in points]
        concave_hull, edge_points = alpha_shape(coords, alpha=alpha)

        if isinstance(concave_hull, MultiPolygon):
            continue

        for exterior in [concave_hull.exterior]:
            context.set_operator(cairo.OPERATOR_MULTIPLY)
            context.set_line_width(3)
            context.set_source_rgba(0, 1, 0.3, 1)
            context.move_to(exterior.coords[0][0], exterior.coords[0][1])
            for point in exterior.coords:
                context.line_to(point[0], point[1])
            context.stroke()

        used_data[line_index] = {
            "index": line_index,
            "hull": concave_hull.exterior,
            "baseline": transformation["lines"][line_index]["baseline"],
            "text": transformation["lines"][line_index]["gt"],
        }

    if visualization_path is not None:
        visualization_path = os.path.join(visualization_path,
                                          str(pair.index) + ".png")
        create_folders(visualization_path)
        image.write_to_png(visualization_path)
    return used_data