Exemplo n.º 1
0
def convertAnnotatedPDF(fname, refNrPath, origPDF):
    """
    Converts a PDF and it's annotations into one PDF.
    """
    #tempdir is where I will save in between files
    try:
        os.mkdir("tempDir")
    except:
        pass
    print(fname + " is being exported.")

    # get info on origin pdf
    input1 = PdfFileReader(open(origPDF, "rb"))
    npages = input1.getNumPages()
    pdfsize = input1.getPage(0).mediaBox
    pdfx = int(pdfsize[2])
    pdfy = int(pdfsize[3])
    # rM will not create a file when the page is empty so this is a
    # placeholde empty file to use.
    rm2svg(emptyRm,
           "tempDir/emptyrm.svg",
           coloured_annotations=True,
           x_width=pdfx,
           y_width=pdfy)

    # find what the page hashes are
    content = json.loads(open(refNrPath + ".content").read())
    # convert all pages
    pdflist = []
    for pg, pg_hash in enumerate(content['pages']):
        # print(pg)
        rmpath = refNrPath + "/" + pg_hash + ".rm"
        if os.path.isfile(rmpath):
            rm2svg(rmpath,
                   "tempDir/temprm" + str(pg) + ".svg",
                   coloured_annotations=False,
                   x_width=pdfx,
                   y_width=pdfy)
            svg_path = "tempDir/temprm" + str(pg) + ".svg"
        else:
            svg_path = "tempDir/emptyrm.svg"
        convertSvg2PdfCmd = "".join([
            "rsvg-convert -f pdf -o ", "tempDir/temppdf" + str(pg), ".pdf ",
            svg_path
        ])
        os.system(convertSvg2PdfCmd)
        pdflist.append("tempDir/temppdf" + str(pg) + ".pdf")
    # merge the annotated pages
    merged_rm = "tempDir/merged_rm.pdf"
    os.system("convert " + (" ").join(pdflist) + " " + merged_rm)
    # stamp extracted annotations onto original with pdftk
    stampCmd = "".join([
        "pdftk ", origPDF, " multistamp ", merged_rm, " output ", origPDF[:-4],
        "_annot.pdf"
    ])
    os.system(stampCmd)
    # Remove temporary files
    shutil.rmtree("tempDir", ignore_errors=False, onerror=None)
    return True
Exemplo n.º 2
0
def convertNotebook(fname, refNrPath):
    """
    Converts Notebook to a PDF by taking the annotations and the template
    background for that notebook.
    """
    #tempdir is where I will save in between files
    try:
        os.mkdir('tempDir')
    except:
        pass
    # get list of backgrounds
    with open(refNrPath + ".pagedata") as file:
        backgrounds = [line.strip() for line in file]

    bg_pg = 0
    bglist = []
    for bg in backgrounds:
        convertSvg2PdfCmd = "".join(["rsvg-convert -f pdf -o ", "tempDir/bg_"\
                                     + str(bg_pg) + ".pdf ", str(bgPath)\
                                     + bg.replace(" ", "\ ") + ".svg"])
        os.system(convertSvg2PdfCmd)
        bglist.append("tempDir/bg_" + str(bg_pg) + ".pdf ")
        bg_pg += 1
    merged_bg = "tempDir/merged_bg.pdf"
    os.system("convert " + (" ").join(bglist) + " " + merged_bg)
    # get info from the pdf we just made
    input1 = PdfFileReader(open(merged_bg, 'rb'))
    pdfsize = input1.getPage(0).mediaBox
    # find out the page hashes
    content = json.loads(open(refNrPath + ".content").read())
    # Now convert all Pages
    pdflist = []
    for pg, pg_hash in enumerate(content['pages']):
        rmpath = refNrPath + "/" + pg_hash + ".rm"
        # skip page if it doesnt extist anymore. This is fine in notebooks
        # because nobody cares about the rM numbering.
        try:
            rm2svg(rmpath,
                   "tempDir/temprm" + str(pg) + ".svg",
                   coloured_annotations=True)
            convertSvg2PdfCmd = \
                "".join(["rsvg-convert -f pdf -o ", "tempDir/temppdf" + \
                    str(pg), ".pdf ", "tempDir/temprm" + str(pg) + ".svg"])
            os.system(convertSvg2PdfCmd)
            pdflist.append("tempDir/temppdf" + str(pg) + ".pdf")
        except FileNotFoundError:
            continue
    # merge all annotation pages
    merged_rm = "tempDir/merged_rm.pdf"
    os.system("convert " + (" ").join(pdflist) + " " + merged_rm)
    # combine with background
    stampCmd = "".join(["pdftk ", merged_bg, " multistamp ", merged_rm, \
        " output " + syncDirectory + "/Notes/" + fname + ".pdf"])
    os.system(stampCmd)
    # Delete temp directory
    shutil.rmtree("tempDir", ignore_errors=False, onerror=None)
    return True
Exemplo n.º 3
0
def convertAnnotatedPDF(fname, refNrPath, origPDF, verbose=False):
    """
    Converts a PDF and it's annotations into one PDF.
    """
    #tempdir is where I will save in between files
    try:
        os.mkdir("tempDir")
    except:
        pass
    print(fname + " is being exported.")

    # get info on origin pdf
    try:
        input1 = pikepdf.Pdf.open(origPDF)
        #input1 = PdfFileReader(open(origPDF, "rb"))
    except:
        warnings.warn("could not read " + origPDF)
        return False
    npages = len(input1.pages)
    pdfsize = input1.pages[0].trimbox
    pdfx = int(pdfsize[2])
    pdfy = int(pdfsize[3])
    if verbose:
        print(f"the pdf has size {pdfx}x{pdfy}")

    # rM will not create a file when the page is empty so this is a
    # placeholde empty file to use.
    rm2svg(emptyRm, "tempDir/emptyrm.svg", coloured_annotations=True)

    ratio_rm = 1872 / 1404
    ratio_pdf = pdfy / pdfx
    # rotate landscape pdfs
    landscape = False
    if ratio_pdf < 1:
        if verbose:
            print(f"its landscape format {ratio_pdf}")
        landscape = True

    if ratio_pdf != ratio_rm:
        pdf_obj = resize_pages(origPDF, landscape, verbose=verbose)
        pdf_obj.save("tempdir/resized.pdf")  # saves as "resizedPDF"
        resizedPDF = "tempdir/resized.pdf"
        #warnings.warn("The PDF you are annotating has an unexpected size. Annotations may be misaligned.")

    # find what the page hashes are
    content = json.loads(open(refNrPath + ".content").read())
    # convert all pages
    pdflist = []
    for pg, pg_hash in enumerate(content['pages']):
        if verbose:
            print(f"converting page {pg}")
        rmpath = refNrPath + "/" + pg_hash + ".rm"
        if os.path.isfile(rmpath):
            rm2svg(rmpath,
                   "tempDir/temprm" + str(pg) + ".svg",
                   coloured_annotations=False)
            svg_path = "tempDir/temprm" + str(pg) + ".svg"
        else:
            svg_path = "tempDir/emptyrm.svg"
        convertSvg2PdfCmd = "".join([
            "rsvg-convert -f pdf -a -o ", "tempDir/temppdf" + str(pg), ".pdf ",
            svg_path
        ])
        os.system(convertSvg2PdfCmd)
        pdflist.append("tempDir/temppdf" + str(pg) + ".pdf")
    # merging at high quality gets sooo slow, so we decrease quality for big files
    density_pdf = 200
    quality_pdf = 80
    if pg > 80:
        density_pdf = 100
        quality_pdf = 50

    # merge the annotated pages
    merged_rm = "tempDir/merged_rm.pdf"
    mergeCmd = f"convert -density {density_pdf}x{density_pdf} -quality {quality_pdf} " + (
        " ").join(pdflist) + " " + merged_rm
    if verbose:
        print(f"Doing Merge: {mergeCmd}")
    #print(mergeCmd)
    os.system(mergeCmd)
    # stamp extracted annotations onto original with pdftk
    stampCmd = "".join([
        "pdftk ", resizedPDF, " multistamp ", merged_rm, " output ",
        origPDF[:-4], "_annot.pdf"
    ])
    if verbose:
        print(f"Doing Stamp: {stampCmd}")
    os.system(stampCmd)
    # Remove temporary files
    shutil.rmtree("tempDir", ignore_errors=False, onerror=None)
    return True
Exemplo n.º 4
0
def convertAnnotatedPDF(fname, refNrPath, origPDF):
    """
    Converts a PDF and it's annotations into one PDF.
    """
    #tempdir is where I will save in between files
    try:
        os.mkdir("tempDir")
    except:
        pass
    print(fname+" is being exported.")

    # get info on origin pdf
    try:
        input1 = PdfFileReader(open(origPDF, "rb"))
    except:
        warnings.warn("could not read " + origPDF)
        return False
    npages = input1.getNumPages()
    pdfsize = input1.getPage(0).mediaBox
    pdfx = int(pdfsize[2])
    pdfy = int(pdfsize[3])
    # rM will not create a file when the page is empty so this is a
    # placeholde empty file to use.
    rm2svg(emptyRm, "tempDir/emptyrm.svg", coloured_annotations=True)

    ratio_rm = 1872 / 1404
    ratio_pdf = pdfy / pdfx
    # rotate landscape pdfs
    landscape = False
    if ratio_pdf < 1:
        landscape = True
        os.system("pdftk " + origPDF + " cat 1-endwest output tempdir/rotated.pdf")
        pdfx = int(pdfsize[3])
        pdfy = int(pdfsize[2])
        ratio_pdf = pdfy / pdfx
        resizedPDF = "tempdir/rotated.pdf"
    else:
        resizedPDF = origPDF

    if ratio_pdf < ratio_rm:
        keep_side = "x"
        change_side = "y"
    else:
        keep_side = "y"
        change_side = "x"

    size = {"x" : pdfx, "y" : pdfy}

    keep_side_is = size[keep_side]
    change_side_is = size[change_side]

    change_side_should = keep_side_is * ratio_rm if keep_side_is < change_side_is else keep_side_is / ratio_rm

    yoffset = change_side_should - change_side_is if change_side == "y" else 0
    #print(size, ratio_rm, ratio_pdf, keep_side, change_side, keep_side_is, change_side_is, change_side_should, yoffset)

    output_size = [keep_side_is, change_side_should] if change_side == "y" else [change_side_should, keep_side_is]
    rszCmd = "gs -o tempDir/resized_org.pdf -sDEVICE=pdfwrite -g"+str(int(output_size[0]))+"0x"+str(int(output_size[1]))+"0 -c '<</PageOffset [0 " + str(int(yoffset)) + "]>> setpagedevice' -f " + resizedPDF
    #print(rszCmd)
    os.system(rszCmd)
    if landscape:
        os.system("pdftk tempDir/resized_org.pdf cat 1-endnorth output tempdir/resized_org2.pdf")
        resizedPDF = "tempDir/resized_org2.pdf"
    else:
        resizedPDF = "tempDir/resized_org.pdf"
    # find what the page hashes are
    content = json.loads(open(refNrPath + ".content").read())
    # convert all pages
    pdflist = []
    for pg, pg_hash in enumerate(content['pages']):
        # print(pg)
        rmpath = refNrPath + "/" + pg_hash + ".rm"
        if os.path.isfile(rmpath):
            rm2svg(rmpath, "tempDir/temprm" + str(pg) + ".svg", coloured_annotations=False)
            svg_path = "tempDir/temprm" + str(pg) + ".svg"
        else:
            svg_path = "tempDir/emptyrm.svg"
        convertSvg2PdfCmd = "".join(["rsvg-convert -f pdf -a -o ", "tempDir/temppdf" + str(pg), ".pdf ", svg_path])
        os.system(convertSvg2PdfCmd)
        pdflist.append("tempDir/temppdf"+str(pg)+".pdf")
    # merge the annotated pages
    merged_rm = "tempDir/merged_rm.pdf"
    mergeCmd = "convert -density 200x200 -quality 80 " + (" ").join(pdflist) + " " + merged_rm
    #print(mergeCmd)
    os.system(mergeCmd)
    # stamp extracted annotations onto original with pdftk
    stampCmd = "".join(["pdftk ", resizedPDF, " multistamp ", merged_rm, " output ", origPDF[:-4], "_annot.pdf"])
    os.system(stampCmd)
    # Remove temporary files
    shutil.rmtree("tempDir", ignore_errors=False, onerror=None)
    return True