Exemplo n.º 1
0
def read_bubble_array(fn, rect=(0.8, 0, 1, 0.7)):

    doc = pdf.open_ensuring_pdf(fn)
    score_data = [page_marks.read_bubble_array(p, rect) for p in doc.pages()]
    page_totals, page_letters = zip(*score_data)

    return (fn, page_totals, page_letters)
Exemplo n.º 2
0
def paste_markrecorder(
    fn,
    output_path,
    bubble_path=page_marks.bubble_array_path,
    out_fn=None,
    rect=(0.87, 0.03, 0.98, 0.5),
):
    ensure_path(output_path)
    out_fn = _extract_fn_from_path(fn) if out_fn is None else out_fn
    full_out_path = os.path.join(output_path, out_fn)

    mark_rec = pdf.open_ensuring_pdf(bubble_path)
    doc = pdf.open_ensuring_pdf(fn)
    out_doc = pdf.paste_pdf_on_every_page(doc, mark_rec, relative_rect=rect)

    out_doc.save(full_out_path)
    return full_out_path
Exemplo n.º 3
0
def test_open_ensuring_pdf(tmp_path, img_fn):

    out = os.path.join(tmp_path, f"{uuid.uuid1()}.pdf")

    doc = pdf.open_ensuring_pdf(img_fn)
    doc.save(out)

    assert inspect.is_pdf(out)
Exemplo n.º 4
0
def add_mark_recorder(doc):
    # mark_rec = fitz.open("other/mark_recorder0.pdf")
    mark_rec = pdf.open_ensuring_pdf("other/mark_recorder0.pdf")
    pdf.paste_pdf_on_every_page(doc,
                                mark_rec,
                                relative_rect=(.87, .03, .98, .5))

    return doc
Exemplo n.º 5
0
def refit(fn, output_path, out_fn=None, rect=(0, 0, 0.85, 0.85)):
    ensure_path(output_path)
    out_fn = _extract_fn_from_path(fn) if out_fn is None else out_fn
    full_out_path = os.path.join(output_path, out_fn)

    doc = pdf.open_ensuring_pdf(fn)
    refitdoc = pdf.refit_pdf(doc, relative_paste_rect=rect)
    refitdoc.save(full_out_path)
    return full_out_path
Exemplo n.º 6
0
def worker(inputs_):
    fn, dest, doc_id, var_id = inputs_
    _, st_num = os.path.split(fn)

    doc = pdf.open_ensuring_pdf(fn)
    doc = refit_(doc)
    doc = add_doc_id(doc, st_num, doc_id, var_id)
    doc = add_mark_recorder(doc)

    doc.save(os.path.join(dest, st_num))
Exemplo n.º 7
0
def write_marks(fn, output_path, out_fn=None, rect=(0.8, 0, 1, 0.7)):
    ensure_path(output_path)
    out_fn = _extract_fn_from_path(fn) if out_fn is None else out_fn
    full_out_path = os.path.join(output_path, out_fn)

    doc = pdf.open_ensuring_pdf(fn)
    for p in doc.pages():
        page_total, _ = page_marks.read_bubble_array(p, rect)
        pdf.place_text(p, str(page_total), relative_rect=(0.9, 0, 1, 0.1))

    doc.save(full_out_path)
    return full_out_path
Exemplo n.º 8
0
def worker(input_):
    fn, dest = input_
    *_, only_fn = os.path.split(fn)

    d = pdf.open_ensuring_pdf(fn)
    for p in d.pages():
        bubble_array = bubbles.read_robust(p, (.8, 0, 1, .5))
        page_total = np.sum(bubble_array * BUBBLEARRAY)

        pdf.place_text(p, str(page_total), relative_rect=(.9, 0, 1, .1))

    out_fn = os.path.join(dest, only_fn)
    d.save(out_fn)
Exemplo n.º 9
0
def test_read_json_qr_robust_fail_multiple():
    doc = pdf.open_ensuring_pdf("test/fixtures/qr/multiple_qrs.pdf")
    with pytest.raises(ValueError):
        read_json = qr.read_json_qr_robust(doc[0],
                                           relative_rect=(0, 0, 1, 1),
                                           zoom=3)
        print(read_json)

    with pytest.raises(ValueError):
        read_json = qr.read_json_qr_robust(doc[0],
                                           relative_rect=(0, 0, 1, 1),
                                           zoom=[3, 4, 5])
        print(read_json)
Exemplo n.º 10
0
def key_merge(key, fn_list, output_path, out_fn=None):
    ensure_path(output_path)
    out_fn = key if out_fn is None else out_fn
    full_out_path = os.path.join(output_path, out_fn)

    try:
        docs = [pdf.open_ensuring_pdf(fn) for fn in fn_list]
        out_doc = pdf.merge_pdf(docs)
        out_doc.save(full_out_path)
        return full_out_path
    except:
        err = ProcessingError(f"Error in keymerge of {fn_list}")
        print(err)
        raise err
Exemplo n.º 11
0
def worker(input_):
    fn, dest = input_
    doc = pdf.open_ensuring_pdf(fn)

    for p in doc.pages():
        qr_data = qr.read_json_qr_robust(p, relative_rect=(0, .8, .4, 1))

        st_num = qr_data["st_num"]
        q = qr_data["doc_id"]
        index = qr_data["page_index"]

        out_fn = os.path.join(dest, f"{st_num}.{q}.{index:0>3}")
        out_d = pdf.doc_from_pages([p])
        out_d.save(out_fn)
Exemplo n.º 12
0
def fn_burst(source, dest, zoom):

    with suppress(FileExistsError):
        os.makedirs(dest)

    fns = [str(p) for p in pathlib.Path(source).glob("**/*")]

    for fn in tqdm(fns):
        doc = pdf.open_ensuring_pdf(fn)
        page_docs = [pdf.doc_from_pages([p]) for p in doc.pages()]

        for i, out_doc in enumerate(page_docs):
            _, true_fn = os.path.split(fn)
            out_fn = os.path.join(dest, f"{true_fn}.{i}.pdf")
            out_doc.save(out_fn)
Exemplo n.º 13
0
def clean(source, dest):
    print(source, dest)
    with suppress(FileExistsError):
        os.makedirs(dest)

    fns = [
        str(p) for p in pathlib.Path(source).glob("**/*")
        if not re.match(r'.*.txt', str(p))
    ]
    buckets = itools.bucket(fns, bucket_key=extract_first_st_num)

    for key, fns in tqdm(list(buckets.items())):
        try:
            doc = pdf.merge_pdf(pdf.open_ensuring_pdf(fn) for fn in fns)
            doc.save(os.path.join(dest, key))
        except RuntimeError:
            print(key, fns)
            raise
Exemplo n.º 14
0
def add_page_id_marks(
    fn,
    data_dict,
    output_path,
    out_fn=None,
    rect=(0.05, 0.88, 0.5, 0.96),
    add_page_indices=True,
):
    ensure_path(output_path)
    out_fn = _extract_fn_from_path(fn) if out_fn is None else out_fn
    full_out_path = os.path.join(output_path, out_fn)

    doc = pdf.open_ensuring_pdf(fn)
    out_doc = common.add_page_id_marks(
        doc, data_dict, add_page_indices=add_page_indices, relative_rect=rect
    )

    out_doc.save(full_out_path)
    return full_out_path
Exemplo n.º 15
0
def worker(input_):
    fn, dest = input_
    *_, only_fn = os.path.split(fn)

    in_doc = pdf.open_ensuring_pdf(fn)

    for i, p in enumerate(in_doc.pages()):

        bubble_array = bubbles.read_robust(p, (0.8, 0, 1, 0.5))
        page_total = np.sum(BUBBLEARRAY * bubble_array)
        qr_data = qr.read_json_qr_robust(p, relative_rect=(0, 0.8, 0.4, 1))
        array_image = pdf.crop_to_pillow_image(p,
                                               relative_rect=(0.85, 0, 1, 0.6),
                                               zoom=2)

        stream = io.BytesIO()
        array_image.save(stream, format="pdf")
        array_pdf = fitz.open(stream=stream.getvalue(), filetype="pdf")

        out_doc = fitz.open()
        newpage = out_doc.newPage(width=200, height=200)
        pdf.paste_pdf_on(newpage, array_pdf, relative_rect=(0.4, 0, 1, 1))
        pdf.place_text(
            newpage,
            f"{qr_data['st_num']}\n{qr_data['doc_id']}",
            relative_rect=(0.05, 0.1, 0.4, 0.5),
            fontsize=10,
        )
        pdf.place_text(
            newpage,
            pprint.pformat(dict(qr_data)),
            relative_rect=(0.05, 0.4, 0.5, 1),
            fontsize=5,
        )

        out_fn = os.path.join(
            dest,
            f"{page_total:07}.{qr_data['st_num']}.{qr_data['doc_id']}.{qr_data['page_index']:05}.pdf",
        )
        out_doc.save(out_fn)
Exemplo n.º 16
0
def varmerge(source, dest, var_name):
    with suppress(FileExistsError):
        os.makedirs(dest)

    def variation_key(fn):
        _, stnum = os.path.split(fn)

        data = d.get(stnum, {})
        if var_name not in data:
            warnings.warn(f"{var_name} not a key in data")

        return data.get(var_name, "unknown")

    print(f"{source} -> {dest}")

    fns = [str(p) for p in pathlib.Path(source).glob("**/*")]
    buckets = itools.bucket(fns, bucket_key=variation_key)

    for key, bucket in tqdm(buckets.items()):
        docs = (pdf.open_ensuring_pdf(fn) for fn in sorted(bucket))
        doc = pdf.merge_pdf(docs)
        doc.save(os.path.join(dest, key))
Exemplo n.º 17
0
def worker(input_):
    fn, zoom = input_
    st_num = extract_first_st_num(fn)

    recorder = collections.defaultdict(int)

    d = pdf.open_ensuring_pdf(fn)
    for p in d.pages():
        bubble_array = bubbles.read_robust(p, (0.8, 0, 1, 0.5))
        page_total = np.sum(bubble_array * BUBBLEARRAY)

        qr_data = qr.read_json_qr_robust(p, relative_rect=(0, 0.8, 0.4, 1))

        st_num = qr_data["st_num"]
        q = qr_data["doc_id"]
        index = qr_data["page_index"]

        recorder["st_num"] = st_num
        recorder[q] += page_total
        recorder["pagecount"] += 1

    return recorder
Exemplo n.º 18
0
def worker(inputs_):
    key, bucket, dest = inputs_

    fns = [pdf.open_ensuring_pdf(fn) for fn in sorted(bucket)]
    doc = pdf.merge_pdf(fns)
    doc.save(os.path.join(dest, f"{key}.pdf"))
Exemplo n.º 19
0
from frow.tools import pdf

doc = pdf.open_ensuring_pdf("input.pdf")
bubble_pdf = pdf.open_ensuring_pdf("bubble_array.pdf")

out_doc = pdf.paste_pdf_on_every_page(doc,
                                      bubble_pdf,
                                      relative_rect=(.87, .03, .98, .5))

out_doc.save("output.pdf")
Exemplo n.º 20
0
from frow.tools import pdf, common, bubbles

doc = pdf.open_ensuring_pdf("input.pdf")
pages = list(doc.pages())

out_doc = pdf.doc_from_pages([pages[0]] * 4)

out_doc.save("output.pdf")
Exemplo n.º 21
0
def read_id_marks(fn, rect=(0, 0.8, 0.4, 1)):

    doc = pdf.open_ensuring_pdf(fn)
    id_data = [page_marks.read_page_id_mark(p, rel_rect=rect) for p in doc.pages()]

    return (fn, id_data)