Пример #1
0
 def bbox(self):
     bbox = fitz.Rect()
     for instance in self._instances:
         bbox |= instance.bbox
     return bbox
Пример #2
0
def trans_pdf(file_name, path):
    t0 = time.time()
    cur_pdf = fitz.open(path)  # 待翻译的pdf
    new_pdf = fitz.open()  # 翻译完成后要写入的pdf
    new_docx = Document()  # 翻译完成后要写入的docx
    new_docx.styles['Normal'].font.name = u'宋体'  # 设置翻译完成后的字体
    new_docx.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')  # 设置翻译完成后的字体
    i = 0  # 定义页面数的递增
    bytes_array = 0
    try:
        for cur_page in cur_pdf:
            img_list = cur_page.getImageList()  # 获取当前页面的图片对象
            print(img_list)
            imgcount = 0
            for img in img_list:  # 获取当前页面的图像列表
                pix_temp1 = fitz.Pixmap(cur_pdf, img[0])
                if img[1]:
                    pix_temp2 = fitz.Pixmap(cur_pdf, img[1])
                    pix_temp = fitz.Pixmap(pix_temp1)
                    pix_temp.setAlpha(pix_temp2.samples)
                else:
                    pix_temp = pix_temp1
                print('当前页面的图像:::', pix_temp)
                imgcount += 1
                new_name = "图片{}.png".format(imgcount)  # 生成图片的名称
                pix_temp.writeImage(os.path.join(settings.BASE_DIR, 'trans', 'output_file', new_name))
                # bytes_array = pix_temp.getImageData('png')#可以不输出图片再写入新的pdf,通过byte
                # print(pix_temp.getImageData('png'))
                pix_temp = None  # 释放资源
            blks = cur_page.getTextBlocks(images=True)  # read text blocks of input page
            new_page = new_pdf.newPage(-1, width=cur_page.MediaBoxSize[0],
                                    height=cur_page.MediaBoxSize[1])  # 创建一个新的页面与之前的页面相同大小
            img = new_page.newShape()  # prepare /Contents object
            disp = fitz.Rect(cur_page.CropBoxPosition, cur_page.CropBoxPosition)
            croprect = cur_page.rect + disp
            # img.drawRect(croprect)#画出整个页面的矩形
            # img.finish(color=gray, fill=gray)#填充颜色
            begin = (0, 0, 0, 0)  # 记录初始值
            end = (0, 0, 0, 0)  # 记录终结值
            flag = 0  # 记录当前的循
            reference_flag = 0  # 判断是否在参考文献之后
            blks.append((1, 2, 3, 6))
            content = ""
            imgcount = 0
            fonts = 9
            for num in range(len(blks)):  # loop through the blocks
                # 如果是本页面最后一个块,直接结束,因为最后一个是方便计算自己添加的。
                if num == len(blks) - 1:
                    break
                # 如果这个块里放的是图像.
                if blks[num][-1] == 1:
                    print('图像:::',blks[num][4])
                    imgcount += 1

                    img_r = blks[num][:4]  # 图片要放置位置的坐标
                    try:
                        path_img = os.path.join(settings.BASE_DIR, 'trans', 'output_file',
                                            '图片{}.png'.format(imgcount))  # 当前页面第几个图片的位置
                        img = open(path_img, "rb").read()  # 输入流
                        new_page.insertImage(img_r, stream=img, keep_proportion=True)  # 输入到新的pdf页面对应位置
                        new_docx.add_picture(path_img, width=Inches(3))  # 设置图片保存的宽度
                        os.remove(path_img)  # 输入到新的pdf之后就移除
                    except:
                        pass
                    continue # 跳过下面的插入翻译后文字的过程

                # 设置默认字体大小以及位置
                if i == 0:  # 当前是第一页的话
                    if num == 0 or num == 1:
                        fonts = 15
                        text_pos = fitz.TEXT_ALIGN_CENTER  # 一般论文前面的标题,作者,机构名等要居中
                    elif num == 2:
                        fonts = 10
                        text_pos = fitz.TEXT_ALIGN_CENTER  # 一般论文前面的标题,作者,机构名等要居中
                    elif num == 3:
                        fonts = 10
                        text_pos = fitz.TEXT_ALIGN_CENTER  # 一般论文前面的标题,作者,机构名等要居中
                    else:
                        fonts = 10
                        text_pos = fitz.TEXT_ALIGN_LEFT  # 设置文字在当前矩阵中的位置靠左排列
                else:
                    fonts = 10
                    text_pos = fitz.TEXT_ALIGN_LEFT  # 设置文字在当前矩阵中的位置靠左排列
                # 目的为了记录起始块坐标
                if num == 0:
                    begin = blks[0][:4]
                    content = blks[0][4].replace("\n", " ")
                # 矩形块,b[0]b[1]为左上角的坐标,b[2]b[3]为右下角的坐标
                r = fitz.Rect(blks[num][:4])
                # 如果不是倒数第一个块,则进入此循环
                if num < len(blks) - 1:
                    # 两个块y轴距离很近的话,这里以1.0为界,这里判断当前数的右下角的坐标y值
                    if (abs(blks[num + 1][1] - blks[num][3]) <= 1.0 and abs(
                            blks[num + 1][1] - blks[num][3]) >= 0):
                        # 当前块在参考文献之后
                        if reference_flag == 1:
                            trans_pragraph = blks[num][4].replace("\n", " ")
                            res = translate_func.bing_translate(trans_pragraph).replace(' ', '')
                            new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR,
                                                                                                  'trans/static/fonts/SimSun.ttf'),
                                                   fontsize=7, align=text_pos)  #
                        # 其它情况
                        else:
                            flag = 1  #
                            # 记录最后的矩形坐标,目的为了取出最后的右下角坐标点
                            end = blks[num + 1][:4]
                            content += blks[num + 1][4].replace("\n", " ")
                            # print('content::',content)

                    # 两个块y轴距离远的的时候
                    else:
                        if flag == 1:
                            # img.drawRect(fitz.Rect(end[0],begin[1],end[2],end[3]))
                            res = translate_func.bing_translate(content).replace(' ', '')  # 翻译结果去掉汉字中的空格
                            new_docx.add_paragraph(res)  # 添加到新的docx文档中
                            # print('content:',content)
                            # print(res)
                            # fitz.Rect(end[0],begin[1],end[2],end[3])为新扩展的矩形框坐标
                            if begin[2] > end[2]:  # 如果起始点的右下角x坐标小于结束点的右下角x坐标
                                new_page.insertTextbox(fitz.Rect(end[0], begin[1], begin[2], end[3]), res, fontname="song",
                                                    fontfile=os.path.join(settings.BASE_DIR,
                                                                          'trans/static/fonts/SimSun.ttf'),
                                                    fontsize=fonts, align=text_pos)
                            else:
                                new_page.insertTextbox(fitz.Rect(end[0], begin[1], end[2], end[3]), res, fontname="song",
                                                    fontfile=os.path.join(settings.BASE_DIR,
                                                                          'trans/static/fonts/SimSun.ttf'),
                                                    fontsize=fonts, align=text_pos)
                            flag = 0
                        else:
                            # img.drawRect(r)
                            trans_pragraph = blks[num][4].replace("\n", " ")  # 将待翻译的句子换行换成空格
                            if is_figure(trans_pragraph.replace(' ','')):  # 将该块的判断是否是图片标注
                                res = translate_func.bing_translate(trans_pragraph).replace(' ', '')  # 翻译结果去掉汉字中的空格
                                new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR,
                                                                                                   'trans/static/fonts/SimSun.ttf'),
                                                    fontsize=7, align=fitz.TEXT_ALIGN_CENTER)
                            # 标记在这里之后的都是参考文献
                            elif is_reference(trans_pragraph.replace(' ','')):
                                reference_flag = 1
                                new_page.insertTextbox(r, '参考文献', fontname="song", fontfile=os.path.join(settings.BASE_DIR,
                                                                                                   'trans/static/fonts/SimSun.ttf'),
                                                    fontsize=fonts, align=text_pos)
                            else:
                                # 翻译结果去掉汉字中的空格
                                res = translate_func.bing_translate(trans_pragraph).replace(' ', '')
                                # 添加到新的docx文档中
                                new_docx.add_paragraph(res)
                                if reference_flag == 1:
                                    new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR,
                                                                                                          'trans/static/fonts/SimSun.ttf'),
                                                           fontsize=7, align=text_pos)  #
                                else:

                                    new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR,
                                                                                                   'trans/static/fonts/SimSun.ttf'),
                                                    fontsize=fonts, align=text_pos)  #
                        # 记录起始矩形坐标
                        begin = blks[num + 1][:4]
                        try:
                            content = blks[num + 1][4].replace("\n", " ")
                            # print('content:::',content)
                        except:
                            pass
                            #print('记录content失败!')
                        # img.finish(width=0.3)
                        # img.commit()
            i += 1
    except:#如果整个过程出现异常,防止翻译工作丢失,直接保存到文件中结束翻译。
        new_file_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file', 'translated_' + file_name)  # 翻译后的pdf保存路径
        new_docx_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file',
                                     'translated_' + file_name[:-4] + '.docx')  # 翻译后的docx保存路径
        new_docx.save(new_docx_name)  # 保存翻译后的docx
        new_pdf.save(new_file_name, garbage=4, deflate=True, clean=True)  # 保存翻译后的pdf
        print('翻译过程出现异常')
    # 文件保存
    new_file_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file', 'translated_' + file_name)  # 翻译后的pdf保存路径
    new_docx_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file',
                                 'translated_' + file_name[:-4] + '.docx')  # 翻译后的docx保存路径
    new_docx.save(new_docx_name)  # 保存翻译后的docx
    new_pdf.save(new_file_name, garbage=4, deflate=True, clean=True)  # 保存翻译后的pdf
    t1 = time.time()
    print("Total translation time: %g sec" % (t1 - t0))
Пример #3
0
def run_remarks(
    input_dir,
    output_dir,
    targets=None,
    pdf_name=None,
    ann_type=None,
    combined_pdf=False,
    modified_pdf=False,
):
    for path in pathlib.Path(f"{input_dir}/").glob("*.metadata"):
        if not is_document(path):
            continue

        filetype = get_document_filetype(path)
        if filetype == 'pdf':
            pages = list_pages_uuids(path)
            name = get_visible_name(path)
            rm_files = list_ann_rm_files(path)

            if pdf_name and (pdf_name not in name):
                continue

            if not pages or not name or not rm_files or not len(rm_files):
                continue

            page_magnitude = math.floor(math.log10(len(pages))) + 1
            in_device_path = get_ui_path(path)

            out_path = pathlib.Path(f"{output_dir}/{in_device_path}/{name}/")
            out_path.mkdir(parents=True, exist_ok=True)

            pdf_src = fitz.open(path.with_name(f"{path.stem}.pdf"))

            if modified_pdf:
                mod_pdf = fitz.open()

            print(f"Working on PDF file: {path.stem}")
            print(f'PDF visibleName: "{name}"')
            print(f"PDF in-device directory: {in_device_path}")

            for rm_file in rm_files:
                page_idx = pages.index(f"{rm_file.stem}")

                pdf_w, pdf_h = get_pdf_page_dims(path, page_idx=page_idx)
                scale = get_pdf_to_device_ratio(pdf_w, pdf_h)

                highlights, scribbles = parse_rm_file(rm_file)

                if ann_type == "highlights":
                    parsed_data = highlights
                elif ann_type == "scribbles":
                    parsed_data = scribbles
                else:  # merge both annotation types
                    parsed_data = {
                        "layers": highlights["layers"] + scribbles["layers"]
                    }

                if not parsed_data.get("layers"):
                    continue

                parsed_data = rescale_parsed_data(parsed_data, scale)

                if "svg" in targets:
                    svg_str = draw_svg(parsed_data)

                    subdir = prepare_subdir(out_path, "svg")
                    with open(f"{subdir}/{page_idx:0{page_magnitude}}.svg",
                              "w") as f:
                        f.write(svg_str)

                ann_doc = fitz.open()

                rm_w_rescaled, rm_h_scaled = get_rescaled_device_dims(scale)
                ann_page = ann_doc.newPage(width=rm_w_rescaled,
                                           height=rm_h_scaled)

                pdf_w_adj, pdf_h_adj = get_adjusted_pdf_dims(
                    pdf_w, pdf_h, scale)
                pdf_rect = fitz.Rect(0, 0, pdf_w_adj, pdf_h_adj)

                ann_page.showPDFpage(pdf_rect, pdf_src, pno=page_idx)

                should_extract_text = ann_type != "scribbles" and highlights
                extractable = is_text_extractable(pdf_src[page_idx])
                ocred = False

                if should_extract_text and not extractable and is_tool(
                        "ocrmypdf"):
                    print(
                        f"Couldn't extract text from page #{page_idx}. Will OCR it. Hold on\n"
                    )

                    tmp_file = "_tmp.pdf"
                    ann_doc.save(tmp_file)
                    ann_doc.close()

                    # Note: as of July 2020, ocrmypdf does not recognize handwriting
                    tmp_file = run_ocr(tmp_file)

                    ann_doc = fitz.open(tmp_file)
                    pathlib.Path(tmp_file).unlink()

                    ann_page = ann_doc[0]
                    ocred = True

                ann_page = draw_pdf(parsed_data, ann_page)

                if "pdf" in targets:
                    subdir = prepare_subdir(out_path, "pdf")
                    ann_doc.save(f"{subdir}/{page_idx:0{page_magnitude}}.pdf")

                if "png" in targets:
                    # (2, 2) is a short-hand for 2x zoom on x and y
                    # ref: https://pymupdf.readthedocs.io/en/latest/page.html#Page.getPixmap
                    pixmap = ann_page.getPixmap(matrix=fitz.Matrix(2, 2))

                    subdir = prepare_subdir(out_path, "png")
                    pixmap.writePNG(
                        f"{subdir}/{page_idx:0{page_magnitude}}.png")

                if "md" in targets:
                    if should_extract_text and (extractable or ocred):
                        md_str = md_from_blocks(ann_page)
                        # TODO: add proper table extraction?
                        # https://pymupdf.readthedocs.io/en/latest/faq.html#how-to-extract-tables-from-documents

                        # TODO: maybe also add highlighted image (pixmap) extraction?

                        subdir = prepare_subdir(out_path, "md")
                        with open(f"{subdir}/{page_idx:0{page_magnitude}}.md",
                                  "w") as f:
                            f.write(md_str)

                    elif not highlights:
                        print(
                            f"Couldn't find any highlighted text on page #{page_idx}"
                        )
                    elif ann_type == "scribbles":
                        print(
                            "Found some highlighted text but `--ann_type` flag was set to `scribbles` only"
                        )
                    else:
                        print(
                            f"Found highlighted text but couldn't create markdown from page #{page_idx}"
                        )

                if modified_pdf:
                    mod_pdf.insertPDF(ann_doc, start_at=-1)

                if combined_pdf:
                    x_max, y_max = get_ann_max_bound(parsed_data)
                    ann_outside = (x_max > pdf_w_adj) or (y_max > pdf_h_adj)

                    # If there are annotations outside the original PDF page limits,
                    # insert the ann_page that we have created from scratch
                    if ann_outside:
                        pdf_src.insertPDF(ann_doc, start_at=page_idx)
                        pdf_src.deletePage(page_idx + 1)

                    # Else, draw annotations in the original PDF page (in-place)
                    # to preserve links (and also the original page size)
                    else:
                        draw_pdf(parsed_data, pdf_src[page_idx], inplace=True)

                ann_doc.close()

            if combined_pdf:
                pdf_src.save(f"{output_dir}/{name} _remarks.pdf")

            if modified_pdf:
                mod_pdf.save(f"{output_dir}/{name} _remarks-only.pdf")
                mod_pdf.close()

            pdf_src.close()
        else:
            print(
                f"Skipping document {path.stem}: document type: {filetype} is currently not supported."
            )
Пример #4
0
 def find_words_rect(page, *words):
     found_words = defaultdict(list)
     for (a, b, c, d, word, x, y, z) in page.getText("words"):
         found_words[word].append(fitz.Rect(a, b, c, d))
     return zip(*(found_words[w] for w in words))
Пример #5
0
  achieved be using Pillow / PIL instead of Tkinter's own support.

* We are not slowing down the speed of showing new images (i.a.w. "frames per
  second"). The statistics displayed at end of program can hence be used as a
  performance indicator.
"""

if not list(map(int, fitz.VersionBind.split("."))) >= [1, 14, 5]:
    raise SystemExit("need PyMuPDF v1.14.5 for this script")

mytime = time.perf_counter

# define some global constants
gold = (1, 1, 0)
blue = (0, 0, 1)
pagerect = fitz.Rect(0, 0, 400, 400)  # dimension of our image

mp = fitz.Point(pagerect.width / 2.0,
                pagerect.height / 2.0)  # center of the page

r = fitz.Rect(mp, mp + (80, 80))  # rect of text box

text = "Just some demo text, to be filled in a rect."

textpoint = fitz.Point(40, 50)  # start position of this text:
itext = "Rotation Morphing by:\nfitz.Matrix(%i)"


# ------------------------------------------------------------------------------
# make one page
# ------------------------------------------------------------------------------
Пример #6
0
import cv2
import fitz
import re
from Downloader import *
import sys

####    Download files from Eli

download = Downloader()
download.getAttach()
w = 390
h = 125

words = ["Paciente"]
img = open('Encabezado.jpg', "rb").read()
rect = fitz.Rect(0, 0, w, h)
docs = glob.glob('adjuntos/*.PDF')
for fname in docs:

    file_handle = fitz.open(fname)
    for pag in file_handle:
        pag.insertImage(rect, stream=img, keep_proportion=False)
        text = pag.getText("text")
        words = text.lower().split()
        try:
            name = ' '.join([
                str(elem) for elem in words[words.index('paciente') +
                                            2:words.index('resultado')]
            ])
        except Exception:
            name = ' '.join([
Пример #7
0
On every page after the first to-be-deleted page, also insert a link, which
points to this page.
The bookmark text equals the text on the page for easy verification.

Then delete some pages and verify:
- the new TOC has empty items exactly for every deleted page
- the remaining TOC items still point to the correct page
- the document has no more links at all
"""
import fitz

page_count = 100  # initial document length
r = range(5, 35, 5)  # contains page numbers we will delete
# insert this link on pages after first deleted one
link = {
    "from": fitz.Rect(100, 100, 120, 120),
    "kind": fitz.LINK_GOTO,
    "page": r[0],
    "to": fitz.Point(100, 100),
}


def test_deletion():
    # First prepare the document.
    doc = fitz.open()
    toc = []
    for i in range(page_count):
        page = doc.new_page()  # make a page
        page.insert_text((100, 100), "%i" % i)  # insert unique text
        if i > r[0]:  # insert a link
            page.insert_link(link)
Пример #8
0
def convert_to_pdf(Name, Date):
    try:
        Name.get()
        Date.get()
    except:
        return
    Customer_name = Name.get()
    Date_day, Date_month, Date_year = Date.get()
    data = None
    with open("details.json", 'r') as f:
        data = json.load(f)

    global Bill, total
    Gross_total = total
    taxable_amount = round((Gross_total * TAX) / 100, 2)
    Net_total = round((Gross_total * (100 + TAX)) / 100, 2)
    Bill_html = "<table align=center>"
    Bill_html += "<tr><td colspan=3 height=100>" + data[
        "Details"] + "</td><td colspan=2></td></tr>"
    Bill_html += "<tr><td>Name:</td><td colspan=2>" + str(
        Customer_name) + "</td><td>Date:</td><td>" + str(Date_day) + "/" + str(
            Date_month) + "/" + str(Date_year) + "</td></tr>"
    Bill_html += "<tr><td class=\"index\">Index</td><td class=\"name\">Name</td><td class=\"price\">Price</td><td class=\"qty\">Qty</td><td class=\"amount\">Amount</td></tr>"
    for i in range(len(Bill)):
        entry = Bill[i]
        Bill_html += "<tr><td class=\"index\">" + str(
            i + 1
        ) + "</td><td class=\"name\">" + entry[
            0] + "</td><td align=\"right\" class=\"price\">" + "{0:.2f}".format(
                entry[1]
            ) + "</td><td align=\"right\" class=\"qty\">" + str(
                entry[2]
            ) + "</td><td align=\"right\" class=\"amount\">" + "{0:.2f}".format(
                entry[3]) + "</td></tr>"
    file_name = "Bill" + data["Counts"] + ".pdf"
    env = Environment(loader=FileSystemLoader('.'))
    Bill_html += "<tr class=\"gross-total\"><td colspan=4>Gross Total</td><td align=\"right\">" + "{0:.2f}".format(
        Gross_total) + "</td></tr>"
    Bill_html += "<tr class=\"taxable-amount\"><td colspan=4>Taxable Amount\n( " + data[
        "TAX_NAME"] + ": " + data[
            "TAX"] + "%)</td><td align=\"right\">" + "{0:.2f}".format(
                taxable_amount) + "</td></tr>"
    Bill_html += "<tr class=\"net-total\"><td colspan=4>Net Total</td><td align=\"right\">" + "{0:.2f}".format(
        Net_total) + "</td></tr>"
    Bill_html += "</table>"
    template = env.get_template("Bill_Preview.html")
    template_vars = {
        "Bill": Bill_html,
    }
    html_out = template.render(template_vars)
    pdfkit.from_string(html_out, file_name)
    data["Counts"] = str(int(data["Counts"]) + 1)
    with open("details.json", "w") as f:
        json.dump(data, f)
    doc = fitz.open(file_name)
    rect = fitz.Rect(358, 15, 428, 118)
    for page in doc:
        page._cleanContents()
        page.insertImage(rect, filename="IndustryLogo.jpg", overlay=True)
    doc.saveIncr()
    system(file_name)
Пример #9
0
def render_box(bounding_box, pdf):
    page = pdf.loadPage(int(bounding_box.page_num) - 1)
    bb = fitz.Rect(bounding_box.min_h, bounding_box.min_v, bounding_box.max_h,
                   bounding_box.max_v)
    pix = page.getPixmap(clip=bb)
    return pix
Пример #10
0
 def bbox(self):
     if not self.cells: return fitz.Rect()
     x0, y0 = self.cells[0][0].bbox.tl
     x1, y1 = self.cells[-1][-1].bbox.br
     return fitz.Rect(x0, y0, x1, y1)
Пример #11
0
    def get_financial_resume(self):
        """
        -------------------------------------------------------------------------------
        Identify the rectangle. We use the text search function here. The two
        search strings are chosen to be unique, to make our case work.
        The two returned rectangle lists both have only one item.
        -------------------------------------------------------------------------------
        """
        headers = ["Valor líquido das operações", "Taxa de liquidação", "Taxa de Registro", 
                "Total CBLC", "Taxa de termo/opções", "Taxa A.N.A", "Emolumentos",
                 "Total Bovespa / Soma", "Corretagem", "ISS", 
                 "I.R.R.F.", "Outras Bovespa", "Total Corretagem / Despesas", "Líquido para"]
        my_financial_resume = {}
        for pno, page in enumerate(self.pages):
            rl1 = page.searchFor("Resumo Financeiro") 
            if not rl1:
                rl1 = page.searchFor("Corretagem / Despesas") # Are we on the other page??
                if not rl1:
                    continue # I don´t need you anymore...
            rl2 = page.searchFor("Líquido para ")       # rect list two
            if rl2:
                rl2 = [rl2[0] | [(601,842),(0,0)][0]]
            else:
                rl2 = [(601,842),(0,0)]

            rect = rl1[0] | rl2[0]
    
            # Now we have the rectangle ---------------------------------------------------
            ###### 
            # select the words which at least intersect the rect
            #------------------------------------------------------------------------------
            mywords = [w for w in self.words[pno] if fitz.Rect(w[:4]).intersects(rect)]
            mywords.sort(key = itemgetter(3, 0))
            group = groupby(mywords, key = itemgetter(3))
            old = ""
            for y1, gwords in group:
                
                line = " ".join(w[4] for w in gwords)
                
                for header in headers:
                    # Did we find the header in the text content?
                    if line[0:len(header)] == header:
                        # Do you already exists?
                        if header in my_financial_resume.keys():
                            # it will append old since it appears before the label (header)
                            my_financial_resume[header].append(old)
                        else:
                            # Create the value in a list.
                            my_financial_resume[header] = [old] 
                # Regex, do your magic and show me the Money!!  XX.XXX,XX Y (Y = C or D)
                old = re.findall(r"(?:[1-9]\d{0,2}(?:\.\d{3})*|0)(?:,\d{1,2})[ ][CD]{1}", line)
                if old:
                    old = old[0]
                else:
                    old = 0
                    
        vl, vc = 0,0
        print("Total de Notas de Corretagem no Arquivo:", self.nc_quantity)
        for nc in range(self.nc_quantity):
            # try:
            # Remove 'falsy"  items...
            try:
                my_financial_resume["Corretagem"] = [x for x in my_financial_resume["Corretagem"] if x] 
            except:
                continue
            for head in headers:
                if head in my_financial_resume.keys() and len(my_financial_resume[head]) > 1:
                    self.ncs_in_file[nc][head] =  my_financial_resume[head][nc]
                else:
                    self.ncs_in_file[nc][head] =  my_financial_resume[head][0]
                # my_financial_resume["Custos Totais"][nc] = float(my_financial_resume[headers[-1]][nc].split(" ")[0]) - float(my_financial_resume[headers[0][nc]].split(" ")[0])
                vl = to_money(self.ncs_in_file[nc]["Valor líquido das operações"])
                if "Líquido para" in self.ncs_in_file[nc].keys():
                    vc = to_money( self.ncs_in_file[nc]["Líquido para"])
                else:
                    continue
            if "Custos Totais" in my_financial_resume.keys():
                my_financial_resume["Custos Totais"].append(locale.currency( abs(vc - vl), grouping = True )) 
            else:
                my_financial_resume["Custos Totais"] = [locale.currency( abs(vc - vl), grouping = True )]
            print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"])
            print("Código do Cliente:", self.ncs_in_file[nc]["CodigoCliente"])
            print("Data da Nota:", self.ncs_in_file[nc]["Data"])
            for head in headers:
                tab = "\t" if head[0:5] == "Total" else ""
                if head in my_financial_resume.keys():
                    if len(my_financial_resume[head]) > 1:
                        print("\t {} {}: {}".format(tab, head,my_financial_resume[head][nc]))
                    else:
                        print("\t {} {}: {}".format(tab, head,my_financial_resume[head]))
                else:
                    print("This Brokeage note seems to be a supported kind! Day trade may be? Missing: {}".format(head))
                    input()
            print("\nResumo:")
            if "Valor líquido das operações" in self.ncs_in_file[nc].keys():
                print("\t Valor Líquido das Operações:", self.ncs_in_file[nc]["Valor líquido das operações"])
                print("\t Valor da Nota de Corretagem:", self.ncs_in_file[nc]["Líquido para"])
                print("\t Custos Totais:", my_financial_resume["Custos Totais"][nc])
            else:
                continue
            print("Conta: ", self.get_account())
        for nc in range(self.nc_quantity):
            print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"])
        print("No Financial Resume!")
        print(" -------- Negociações -------- " )

 # Duplicando print de resumo financeiro não ignorando o segunda NC.       
# file:///Users/maion/OneDrive/Documentos/Documentos%20Felipe/programs/ruby/Python/PyCharmProjects/IR/Notas%20Corretagem/pdf/240303-20130829-NC4617559-929443.pdf 
        self.get_transactions()
        self.get_transactions_for_options()
        # TODO implement this..
        # self.get_transactions_for_index()

        # print(*[(w[1], w[0].split(" ")) for w in self.negotiations], sep="\n")
        print(" ______________________________")
        # except:
            # print("Conta: ", self.get_account())
            # for nc in range(self.nc_quantity):
            #     print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"])
            # print("Day Trade - To be implemented!")
        self.financial_resume = my_financial_resume
        return self.financial_resume
Пример #12
0
 def bbox(self):
     bbox = fitz.Rect()
     for instance in self._instances:
         bbox = bbox | instance.bbox  # NOTE: | support fitz.Rect and rect-like object, e.g. tuple
     return bbox
Пример #13
0
blue = (0, 0, 1)
green = (0, 1, 0)
gray = (0.9, 0.9, 0.9)

for page1 in doc1:
    blks = page1.getTextBlocks(images=True)  # read text blocks of input page
    # create new page in output with /MediaBox dimensions
    page2 = doc2.newPage(-1,
                         width=page1.MediaBoxSize[0],
                         height=page1.MediaBoxSize[1])
    # the text font we use
    page2.insertFont(fontfile=None, fontname="Helvetica")
    img = page2.newShape()  # prepare /Contents object

    # calculate /CropBox & displacement
    disp = fitz.Rect(page1.CropBoxPosition, page1.CropBoxPosition)
    croprect = page1.rect + disp

    # draw original /CropBox rectangle
    img.drawRect(croprect)
    img.finish(color=gray, fill=gray)

    for b in blks:  # loop through the blocks
        r = fitz.Rect(b[:4])  # block rectangle
        # add dislacement of original /CropBox
        r += disp
        img.drawRect(r)  # surround block rectangle

        if b[-1] == 1:  # if image block ...
            color = red
            a = fitz.TEXT_ALIGN_CENTER
Пример #14
0
     imgcount += 1
     new_name = "图片{}.png".format(imgcount)  # 生成图片的名称
     pix_temp.writeImage(
         os.path.join(root, '\EasyTrans', 'trans', 'output_file',
                      new_name))
     # bytes_array = pix_temp.getImageData('png')#可以不输出图片再写入新的pdf,通过byte
     # print(pix_temp.getImageData('png'))
     pix_temp = None  # 释放资源
 blks = cur_page.getTextBlocks(
     images=True)  # read text blocks of input page
 new_page = new_pdf.newPage(
     -1,
     width=cur_page.MediaBoxSize[0],
     height=cur_page.MediaBoxSize[1])  # 创建一个新的页面与之前的页面相同大小
 img = new_page.newShape()  # prepare /Contents object
 disp = fitz.Rect(cur_page.CropBoxPosition, cur_page.CropBoxPosition)
 croprect = cur_page.rect + disp
 # img.drawRect(croprect)#画出整个页面的矩形
 # img.finish(color=gray, fill=gray)#填充颜色
 begin = (0, 0, 0, 0)  # 记录初始值
 end = (0, 0, 0, 0)  # 记录终结值
 flag = 0  # 记录当前的循
 reference_flag = 0  # 判断是否在参考文献之后
 blks.append((1, 2, 3, 6))
 content = ""
 imgcount = 0
 fonts = 9
 for num in range(len(blks)):  # loop through the blocks
     # 如果是本页面最后一个块,直接结束,因为最后一个是方便计算自己添加的。
     if num == len(blks) - 1:
         break
Пример #15
0
import fitz
import pandas as pd
doc = fitz.open('<Your File Name>.pdf')
page1 = doc[0]
words = page1.get_text("words")

#Extract the coordinates of the first object :
first_annots=[]
rec=page1.first_annot.rect

rec

#Information of words in first object is stored in mywords
mywords = [w for w in words if fitz.Rect(w[:4]) in rec]
ann= make_text(mywords)
first_annots.append(ann)

#This function selects the words contained in the box, sort the words and return in form of a string :

def make_text(words):
    line_dict = {}
    words.sort(key=lambda w: w[0])

    for w in words:
        y1 = round(w[3], 1)
        word = w[4]
        line = line_dict.get(y1, [])
        line.append(word)

Extracting each page of the document and all the annots/rectangles :
  for pageno in range(0,len(doc)-1):
Пример #16
0
# Evan Widloski - 2020-11-07
# demonstrating storing pen strokes directly in PDF

import fitz

# name of layer where pen strokes live
stroke_layer_name = 'remarkable pen'

# create new document
doc = fitz.open()
page = doc.newPage()
page.setRotation(0)

# draw a rectangle and add some text for fun
r = fitz.Rect(72, 72, 100, 100)
page.addFreetextAnnot(r, 'foobar')

# ----- write pen strokes -----

# create new optional content group
pen_ocg_xref = doc.addOCG(stroke_layer_name, config=-1, on=1, intent=None)
# create new annotation and set as optional content
annot = page.addInkAnnot([
    [(72, 72), (100, 100), (200, 100), (100, 200)],
    [(372, 300), (367, 324), (355, 346), (336, 362), (312, 370), (287, 370),
     (264, 362), (244, 346), (232, 324), (228, 300)],
])
annot.setOC(pen_ocg_xref)
print(pen_ocg_xref)

doc.save('out.pdf', deflate=True)
Пример #17
0
def extract_rectangle_text(rect: fitz.Rect, wordlist):
    words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(rect)]
    sentence = " ".join(w[4] for w in words)
    return sentence.strip()
Пример #18
0
def pencil(page, penciltip, pb_height, left):
    """Draws a pencil image. 
    """
    from fitz.utils import getColor
    # define some colors
    yellow = getColor("darkgoldenrod")
    black = getColor("black")
    white = getColor("white")
    red = getColor("red")
    wood = getColor("wheat2")
    wood2 = getColor("wheat3")
    #---------------------------------------------------------------------------
    # some adjustments follow depending on pencil tip is left or right:
    # when choices need be made between a left point (lp) or a right point (rb),
    # we specify lp*a + rp*b, delivering either lp or rp.
    # Variable 's' is used as a sign and is either +1 or -1.
    #---------------------------------------------------------------------------
    if left:  # pencil tip is left
        a = 1
        b = 0
        s = 1
    else:
        a = 0
        b = 1
        s = -1

    def oneof(x, y):
        return x * a + y * b

    w = pb_height * 0.01  # standard line thickness
    pb_width = 2 * pb_height  # pencil body width
    tipendtop = penciltip + fitz.Point(1, -0.5) * pb_height * s
    tipendbot = penciltip + fitz.Point(1, 0.5) * pb_height * s
    r = fitz.Rect(tipendtop, tipendbot + (pb_width * s, 0))  # pencil body
    r.normalize()  # force r to be finite
    # topline / botline indicate the pencil edges
    topline0 = fitz.Point(r.x0 + r.width * 0.1,
                          r.y0 + pb_height / 5.)  # upper pencil edge - left
    topline1 = fitz.Point(r.x0 + r.width * 0.9,
                          topline0.y)  # upper epncil edge - right
    botline0 = fitz.Point(r.x0 + r.width * 0.1,
                          r.y1 - pb_height / 5.)  # lower pencil edge - left
    botline1 = fitz.Point(r.x0 + r.width * 0.9,
                          botline0.y)  # lower pencil edge - right

    # control point 1 for pencil rubber
    hp1 = oneof(r.top_right, r.top_left) + (pb_height * 0.6 * s, 0)
    # control point 2 for pencil rubber
    hp2 = oneof(r.bottom_right, r.bottom_left) + (pb_height * 0.6 * s, 0)
    # pencil body is some type of yellow
    page.drawRect(r, fill=yellow, color=wood, width=w)
    page.drawPolyline((r.top_left, topline0, botline0, r.bottom_left),
                      fill=wood,
                      color=yellow,
                      width=w)
    page.drawPolyline((r.top_right, topline1, botline1, r.bottom_right),
                      fill=wood,
                      color=yellow,
                      width=w)
    # draw pencil edge lines
    page.drawLine(topline0, topline1, width=w, color=wood2)
    page.drawLine(botline0, botline1, width=w, color=wood2)

    #===========================================================================
    # draw the pencil rubber
    #===========================================================================
    page.drawBezier(oneof(r.top_right, r.top_left),
                    hp1,
                    hp2,
                    oneof(r.bottom_right, r.bottom_left),
                    fill=red,
                    width=w)
    #===========================================================================
    # black rectangle near pencil rubber
    #===========================================================================
    blackrect = fitz.Rect(
        oneof((r.top_right - (pb_height / 2., 0)), r.top_left),
        oneof(r.bottom_right, (r.bottom_left + (pb_height / 2., 0))))
    page.drawRect(blackrect, fill=black, width=w)

    #===========================================================================
    # draw pencil tip and curves indicating pencil sharpening traces
    #===========================================================================
    page.drawPolyline((tipendtop, penciltip, tipendbot), width=w,
                      fill=wood)  # pencil tip
    p1 = oneof(r.top_left, r.top_right)  # either left or right
    p2 = oneof(topline0, topline1)
    p3 = oneof(botline0, botline1)
    p4 = oneof(r.bottom_left, r.bottom_right)
    p0 = -fitz.Point(pb_height / 5.,
                     0) * s  # horiz. displacment of ctrl points
    cp1 = p1 + (p2 - p1) * 0.5 + p0  # ctrl point upper rounding
    cp2 = p2 + (p3 - p2) * 0.5 + p0 * 2.9  # ctrl point middle rounding
    cp3 = p3 + (p4 - p3) * 0.5 + p0  # ctrl point lower rounding
    page.drawCurve(p1, cp1, p2, fill=yellow, width=w, color=wood)
    page.drawCurve(p2, cp2, p3, fill=yellow, width=w, color=wood)
    page.drawCurve(p3, cp3, p4, fill=yellow, width=w, color=wood)

    #===========================================================================
    # draw the pencil tip lead mine
    #===========================================================================
    page.drawPolyline(
        (penciltip + (tipendtop - penciltip) * 0.4, penciltip, penciltip +
         (tipendbot - penciltip) * 0.4),
        fill=black,
        width=w,
        closePath=True)
    #===========================================================================
    # add a curve to indicate lead mine is round
    #===========================================================================
    page.drawCurve(penciltip + (tipendtop - penciltip) * 0.4,
                   fitz.Point(penciltip.x + pb_height * 0.6 * s, penciltip.y),
                   penciltip + (tipendbot - penciltip) * 0.4,
                   width=w,
                   fill=black)

    #===========================================================================
    # re-border pencil body getting rid of some pesky pixels
    #===========================================================================
    page.drawLine(r.top_left, r.top_right, width=w)
    page.drawLine(r.bottom_left, r.bottom_right, width=w)
    page.drawPolyline((tipendtop, penciltip, tipendbot), width=w)
    #===========================================================================
    # draw pencil label - first a rounded rectangle
    #===========================================================================
    p1 = fitz.Point(0.65, 0.15) * pb_height
    p2 = fitz.Point(0.45, 0.15) * pb_height
    lblrect = fitz.Rect(topline0 + oneof(p1, p2), botline1 - oneof(p2, p1))
    page.drawRect(lblrect, width=w, fill=black)
    page.drawCurve(lblrect.top_right,
                   fitz.Point(lblrect.x1 + pb_height / 4., penciltip.y),
                   lblrect.bottom_right,
                   width=w,
                   fill=black)
    page.drawCurve(lblrect.top_left,
                   fitz.Point(lblrect.x0 - pb_height / 4., penciltip.y),
                   lblrect.bottom_left,
                   width=w,
                   fill=black)
    # ... then text indicating it's a medium pencil
    if page.insertTextbox(lblrect,
                          "No.2",
                          color=white,
                          fontname="Helvetica",
                          fontsize=pb_height * 0.22,
                          align=1) < 0:
        raise ValueError("not enough space to store pencil text")

    #===========================================================================
    # finally the white vertical stripes - whatever they are good for
    #===========================================================================
    p1t = blackrect.top_left + fitz.Point(blackrect.width / 3.,
                                          pb_height / 20.)
    p1b = blackrect.bottom_left + fitz.Point(blackrect.width / 3.,
                                             -pb_height / 20.)
    p2t = blackrect.top_left + fitz.Point(blackrect.width * 2 / 3.,
                                          pb_height / 20.)
    p2b = blackrect.bottom_left + fitz.Point(blackrect.width * 2 / 3.,
                                             -pb_height / 20.)
    page.drawLine(p1t,
                  p1b,
                  color=white,
                  width=pb_height * 0.08,
                  roundCap=False)
    page.drawLine(p2t,
                  p2b,
                  color=white,
                  width=pb_height * 0.08,
                  roundCap=False)
    return
Пример #19
0
def main(argv):

 input_file = ''
 output_file = ''
 nb_delete = 0
 nb_big = 0
 nb_small = 0
 nb_lateral = 0

 try:
     opts, args = getopt.getopt(argv,"i:d:g:p:l:",["ifile=","ofile="])
 except getopt.GetoptError:
      print ('insert_rect.py -i inputfile -d x_first_pages_to_delete -g x_first_pages_with_big_pict -p small_pic_from_page_y -l lateral_page_from_page_z')
      sys.exit(2)
 for opt, arg in opts:
      if opt == '-h':
         print ('insert_rect.py -i inputfile -d x_first_pages_to_delete -g x_first_pages_with_big_pict -p small_pic_from_page_y -l lateral_page_from_page_z')
         sys.exit()
      elif opt in ("-i", "--ifile"):
         input_file = arg + ".pdf"
         output_file = arg + "_vu.pdf"
      elif opt in ("-d", "--ifile"):
         nb_delete = int(arg)
         print("delete ",nb_delete," first pages")
      elif opt in ("-g", "--ofile"):
         nb_big = int(arg)
         print("big picture on ",nb_big," first pages")
      elif opt in ("-p", "--ofile"):
         nb_small = int(arg)
         print("small picture from page ",nb_small)
      elif opt in ("-l", "--ofile"):
         nb_lateral = int(arg)
         print("lateral picture from page ",nb_lateral)
   
 print ('Input file is ', input_file)
 print ('Output file is ', output_file)

# insert big picture
 image_file = open(bigpicture, 'rb').read()
 # define the position (upper-right corner)
 image_rectangle = fitz.Rect(0,-80,750,210)
 # retrieve the first page of the PDF
 file_handle = fitz.open(input_file)
# first_page = file_handle[0]
 for page in file_handle.pages(0, nb_big, 1):
      # add the image
      page.insertImage(image_rectangle, stream=image_file)

# insert small picture
 image_file = open(smallpicture, 'rb').read()
 # define the position (upper-right corner) 1000 55
 image_rectangle = fitz.Rect(0,0,400,35)
 # retrieve the first page of the PDF
# file_handle = fitz.open(input_file)
 for page in file_handle.pages(nb_small, file_handle.pageCount, 1):
      # add the image
      page.insertImage(image_rectangle, stream=image_file)

# insert lateral picture
 image_file = open(lateralpicture, 'rb').read()
 # define the position (upper-right corner)
 image_rectangle = fitz.Rect(510,-50,720,850)
 # retrieve the first page of the PDF
# file_handle = fitz.open(input_file)
 for page in file_handle.pages(nb_lateral, file_handle.pageCount, 1):
      # add the image
      page.insertImage(image_rectangle, stream=image_file)

# delete first pages
 l = list(range(nb_delete, file_handle.pageCount))    # 2-end
 file_handle.select(l)                           # delete all others
 file_handle.save(output_file, garbage=3) # save and clean new PDF
 file_handle.close()
Пример #20
0
def compare_layput(filename_source,
                   filename_target,
                   filename_output,
                   threshold=0.7):
    ''' Compare layout of two pdf files:
        It's difficult to have an exactly same layout of blocks, but ensure they
        look like each other. So, with `extractWORDS()`, all words with bbox 
        information are compared.

        ```
        (x0, y0, x1, y1, "word", block_no, line_no, word_no)
        ```
    '''
    # fitz document
    source = fitz.open(filename_source)  # type: fitz.Document
    target = fitz.open(filename_target)  # type: fitz.Document

    # check count of pages
    # --------------------------
    if len(source) != len(target):
        msg = 'Page count is inconsistent with source file.'
        print(msg)
        return False

    flag = True
    errs = []
    for source_page, target_page in zip(source, target):

        # check position of each word
        # ---------------------------
        source_words = source_page.getText('words')
        target_words = target_page.getText('words')

        # sort by word
        source_words.sort(
            key=lambda item: (item[4], round(item[1], 1), round(item[0], 1)))
        target_words.sort(
            key=lambda item: (item[4], round(item[1], 1), round(item[0], 1)))

        if len(source_words) != len(target_words):
            msg = 'Words count is inconsistent with source file.'
            print(msg)

        # check each word and bbox
        for sample, test in zip(source_words, target_words):
            source_rect, target_rect = fitz.Rect(sample[0:4]), fitz.Rect(
                test[0:4])

            # draw bbox based on source layout
            source_page.drawRect(source_rect, color=(1, 1, 0),
                                 overlay=True)  # source position
            source_page.drawRect(target_rect, color=(1, 0, 0),
                                 overlay=True)  # current position

            # check bbox word by word: ignore small bbox, e.g. single letter bbox
            if not Element().update_bbox(source_rect).get_main_bbox(
                    target_rect, threshold):
                flag = False
                errs.append(
                    (f'{sample[4]} ===> {test[4]}', target_rect, source_rect))

    # save and close
    source.save(filename_output)
    target.close()
    source.close()

    # outputs
    for word, target_rect, source_rect in errs:
        print(
            f'Word "{word}": \nsample bbox: {source_rect}\ncurrent bbox: {target_rect}\n'
        )

    return flag
Пример #21
0
import fitz                          # <-- PyMuPDF
doc = fitz.open("some.pdf")          # open the PDF
rect = fitz.Rect(0, 0, 100, 100)     # where to put image: use upper left corner

for page in doc:
    page.insertImage(rect, filename = "some.image")

doc.saveIncr()                       # do an incremental save
    def get_financial_resume(self):
        """
        -------------------------------------------------------------------------------
        Identify the rectangle. We use the text search function here. The two
        search strings are chosen to be unique, to make our case work.
        The two returned rectangle lists both have only one item.
        -------------------------------------------------------------------------------
        """
        my_financial_resume = {}
        for pno, page in enumerate(self.pages):
            rl1 = page.searchFor("Resumo Financeiro")
            if not rl1:
                rl1 = page.searchFor(
                    "Corretagem / Despesas")  # Are we on the other page??
                if not rl1:
                    continue  # I don´t need you anymore...
            rl2 = page.searchFor("Líquido para ")  # rect list two
            if rl2:
                rl2 = [rl2[0] | [(601, 842), (0, 0)][0]]
            else:
                rl2 = [(601, 842), (0, 0)]

            rect = rl1[0] | rl2[0]

            # Now we have the rectangle ---------------------------------------------------
            ######
            # select the words which at least intersect the rect
            #------------------------------------------------------------------------------
            mywords = [
                w for w in self.words[pno] if fitz.Rect(w[:4]).intersects(rect)
            ]
            mywords.sort(key=itemgetter(3, 0))
            group = groupby(mywords, key=itemgetter(3))
            old = ""
            for y1, gwords in group:

                line = " ".join(w[4] for w in gwords)
                headers = [
                    "Valor líquido das operações", "Taxa de liquidação",
                    "Taxa de Registro", "Total CBLC", "Taxa de termo/opções",
                    "Taxa A.N.A", "Emolumentos", "Total Bovespa / Soma",
                    "Corretagem", "ISS", "I.R.R.F.", "Outras Bovespa",
                    "Total Corretagem / Despesas", "Líquido para"
                ]
                for header in headers:
                    # Did we find the header in the text content?
                    if line[0:len(header)] == header:
                        # Do you already exists?
                        if header in my_financial_resume.keys():
                            # it will append old since it appears before the label (header)
                            my_financial_resume[header].append(old)
                        else:
                            # Create the value in a list.
                            my_financial_resume[header] = [old]
                # Regex, do your magic and show me the Money!!  XX.XXX,XX Y (Y = C or D)
                old = re.findall(
                    r"(?:[1-9]\d{0,2}(?:\.\d{3})*|0)(?:,\d{1,2})[ ][CD]{1}",
                    line)
                if old:
                    old = old[0]
                else:
                    old = 0

        print("Total de Notas de Corretagem no Arquivo:", self.nc_quantity)
        for nc in range(self.nc_quantity):
            try:
                # Remove 'falsy"  items...
                my_financial_resume["Corretagem"] = [
                    x for x in my_financial_resume["Corretagem"] if x
                ]
                for head in headers:
                    self.ncs_in_file[nc][head] = my_financial_resume[head][nc]
                print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"])
                print("Código do Cliente:",
                      self.ncs_in_file[nc]["CodigoCliente"])
                print("Data da Nota:", self.ncs_in_file[nc]["Data"])
                for head in headers:
                    tab = "\t" if head[0:5] == "Total" else ""

                    print("\t {} {}: {}".format(tab, head,
                                                my_financial_resume[head][nc]))
                print("\nResumo:")
                print("\t Valor Líquido das Operações:",
                      self.ncs_in_file[nc]["Valor líquido das operações"])
                print("\t Valor da Nota de Corretagem",
                      self.ncs_in_file[nc]["Líquido para"])
                print(" ")
            except:
                print("Conta: ", self.get_account())
                for nc in range(self.nc_quantity):
                    print("Nota de Corretagem Nº:",
                          self.ncs_in_file[nc]["Nota"])
                print("Day Trade - To be implemented!")
        self.financial_resume = my_financial_resume
        return self.financial_resume
Пример #23
0
page.insertText(fitz.Point(ts_h - 30, ts_v - 30),
                stitle,
                fontsize=13,
                color=blue)

# draw the table data
for i, c in enumerate(table):
    beta = c[0] / seats * 180  # express seats as semi circle angles
    color = getColor(c[1])  # avoid multiple color lookups
    # the method delivers point of other end of the constructed arc
    # we will use it as input for next round
    point = page.drawSector(center,
                            point,
                            beta,
                            color=white,
                            fullSector=True,
                            fill=color)

    text = "%s, %i %s" % (c[2], c[0], "Sitze" if c[0] > 1 else "Sitz")
    pos = fitz.Point(ts_h, ts_v + i * lineheight)
    page.insertText(pos, text, color=blue)
    tl = fitz.Point(pos.x - 30, ts_v - 10 + i * lineheight)
    br = fitz.Point(pos.x - 10, ts_v + i * lineheight)
    rect = fitz.Rect(tl, br)  # legend color bar
    page.drawRect(rect, fill=color, color=color)

# overlay center of circle with white to simulate an auditorium
page.drawCircle(center, radius - 70, color=white, fill=white)
doc.save("piechart2.pdf")
Пример #24
0
    return rects


doc = fitz.open()  # new PDF
page = doc.newPage()  # new page
shape = page.newShape()  # make a page draw area
opacity = 0.3  # all annotation use this opacity
tcol = (0, 0, 1)  # text color
gold = (1, 1, 0)  # highlight color
bg_color = "skyblue3"
background = fitz.utils.getColor(bg_color)  # background color
fname = "hebo"  # Helvetica Bold
fsize = 12  # generous font size
tl = page.rect.tl + (150, 100)
br = page.rect.br - (150, 62)
rect = fitz.Rect(tl, br)  # only use this area of the page

rects = table(  # define a table with 2 cells per blend mode
    rows=len(blend_modes),  # one row per blend mode
    cols=2,  # for the blend mode and its highlighted version
    rect=rect,  # inside this rectangle
)

# paint page background
# will provide better visibility of highlighted text
shape.drawRect(page.rect)
shape.finish(fill=background, color=background)

# fill the table
for i, bmode in enumerate(blend_modes):
    r = rects[i]  # contains 2 rectangles
# used for non-CJK characters. For CJK, the fallback is always used.
text = """This is a text of mixed languages to demonstrate MuPDF's text output capabilities.
Font used for the non-CJK characters: '%s', font size: %g, color: %s.
Euro: €, some special signs: |~°²³, general Latin: ñäöüßâ
Japan: 熊野三山本願所は、15世紀末以降における熊野三山(熊野本宮、熊野新宮
Greece: Στα ερείπια της πόλης, που ήταν ένα σημαντικό
Korea: 에듀롬은 하나의 계정으로 전 세계 고등교육 기관의 인터넷에 접속할
Russia: Ко времени восшествия на престол Якова I в значительной
China: 北京作为城市的历史可以追溯到3,000年前。西周初年,周武王封召公奭于燕國。
This longer text part checks, whether the very last line will not be justified either.""" % (
    font.name,
    fsize,
    blue,
)

fill_rect = fitz.Rect(72, 72, 372, 372)  #  keep above text in here
writer = fitz.TextWriter(page_rect, color=blue)  # start a text writer

writer.fillTextbox(  # fill in above text
    fill_rect,  # keep text inside this
    text,  # the text
    align=fitz.TEXT_ALIGN_JUSTIFY,  # alignment
    warn=True,  # keep going if too much text
    fontsize=fsize,
    font=font,
)

# write our results to the PDF page.
writer.writeText(page)

# To show what happened, draw the rectangles, etc.
Пример #26
0
print(fitz.__doc__)
if fitz.VersionBind.split(".") < ["1", "17", "0"]:
    sys.exit("PyMuPDF v1.17.0+ is needed.")

gc.set_debug(gc.DEBUG_UNCOLLECTABLE)

highlight = "this text is highlighted"
underline = "this text is underlined"
strikeout = "this text is striked out"
squiggled = "this text is zigzag-underlined"
red = (1, 0, 0)
blue = (0, 0, 1)
gold = (1, 1, 0)
green = (0, 1, 0)

displ = fitz.Rect(0, 50, 0, 50)
r = fitz.Rect(72, 72, 220, 100)
t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!"


def print_descr(annot):
    """Print a short description to the right of each annot rect."""
    annot.parent.insert_text(annot.rect.br + (10, -5),
                             "%s annotation" % annot.type[1],
                             color=red)


doc = fitz.open()
page = doc.new_page()

page.set_rotation(0)
Пример #27
0
 def wxRect_to_Rect(self, wr):
     """ Return a shrunk fitz.Rect for given wx.Rect."""
     r = fitz.Rect(wr.x, wr.y, wr.x + wr.width, wr.y + wr.height)
     return r * self.shrink  # shrunk fitz.Rect version
Пример #28
0
def generate(args, page):
    pagerect = list()
    # rl1 = page.searchFor("#")[0]
    rl1 = fitz.Rect(100, 30, 130, 70)  # sometimes in text so hardcode
    # rl2 = page.searchFor("THIS BEER IS")[0]  # sometimes in text so hardcode
    rl2 = fitz.Rect(20, 182, 555, 197)  # sometimes in text so hardcode
    if (page.number % 2):
        rl2.x1 = 575
        rl2.x0 = 40
    pagerect.append(rl1 | rl2)  ### union rectangle 0 (header)
    rectdesc = fitz.Rect(10, 120, 410, 160)
    if (page.number % 2):
        rectdesc.x1 += 20
    # rl1 = page.searchFor("THIS BEER IS")[0]
    rl1 = fitz.Rect(40, 182, 180, 197)  # sometimes in text so hardcode
    rl2 = page.searchFor("BASICS")[0]
    rl3 = page.searchFor("METHOD / TIMINGS")[
        0]  # sometimes food and method reversed
    method2 = 0
    if (rl3.x0 > 150):  #method is in 2nd row
        rl3 = page.searchFor("FOOD PAIRING")[0]
        method2 = 1
    rl2.x1 = 180
    rl3.x1 = 180
    if (page.number % 2):
        rl2.x1 = 200
        rl3.x1 = 200
    pagerect.extend((rl1 | rl2, rl2 | rl3))  ### union rectangle 1 and 2
    rl3.y1 = 780
    pagerect.append(rl3)  ### rect 3

    rl1 = page.searchFor("INGREDIENTS")[0]
    if rl1.y0 > 190:
        rl1 = page.searchFor("INGREDIENTS")[1]
    rl2 = page.searchFor("FOOD PAIRING")
    if method2:
        rl2 = page.searchFor(
            "METHOD / TIMINGS")  # sometimes food and method reversed
    if rl2 == []:
        rl2 = (rl1, )
        rl2[0].y1 = 780
    rl2 = rl2[0]
    if rl2.x0 > 300:  #food pairing is sometimes on third column
        special = 1
        rl1.y1 = 780
        rl1.x1 = 340
        if (page.number % 2):
            rl1.x1 = 360
        pagerect.append(rl1)  # rect4 if pairing next
        rl2.x1 = 555
        if (page.number % 2):
            rl2.x1 = 575
        rl2.y1 = 780
        pagerect.append(rl2)  # rect5 if pairing next
    else:
        special = 0
        rl2.x1 = 340
        if (page.number % 2):
            rl2.x1 = 360
        pagerect.append(rl1 | rl2)  ### union rectangle 4
        rl2.y1 = 780
        pagerect.append(rl2)  ### rect 5
    if method2:
        pagerect[3], pagerect[5] = pagerect[5], pagerect[3]

    rl1 = page.searchFor("PACKAGING")[0]
    rl2 = page.searchFor("BREWER’S TIP")[0]
    rl2.x1 = 555
    if (page.number % 2):
        rl2.x1 = 575
    rlt = rl1 | rl2
    rlt.y0 += 20  # to crop for photo
    rlt.y1 -= 35
    rlt.x1 -= 5
    pagerect.append(rlt)  ### union rectangle 6
    if special:
        rlr = page.searchFor("FOOD PAIRING")[0]
        pagerect.append(rlr | rl2)  ### union rectangle 7 special
    else:
        rl2.y1 = 780
        pagerect.append(rl2)  ### rect 7

    # FOR TESTING RECTANGLES :
    if args.debug:
        for rect in pagerect:
            page.drawRect(rect, color=(1, 0, 0), width=2)
        page.drawRect(rectdesc, color=(0, 1, 0), width=2)
        page.getPixmap().writeImage("page-%i-test.png" % page.number)

    words = page.getTextWords()
    blocks = page.getTextBlocks()
    beer = {}
    for i, rect in enumerate(pagerect):
        myblocks = [w for w in blocks if fitz.Rect(w[:4]).intersect(rect)]
        # myblocks = [w for w in blocks if fitz.Rect(w[:4]) in rect]
        groupblock = groupby(sorted(myblocks, key=itemgetter(3, 0)),
                             key=itemgetter(3))
        sentence_list_1 = [
            " ".join(w[4] for w in gwords) for y1, gwords in groupblock
        ]
        sentence_list_blk = [
            re.sub(r"(\s+)", r" ", s) for s in sentence_list_1
        ]
        blkstr = "\n".join(sentence_list_blk)
        blkstr2 = "\n".join(sentence_list_1)
        sentence_list_blk_2 = blkstr2.split("\n")

        if args.debug:
            # print(i,sentence_list_blk)
            print(i, sentence_list_blk)
            print(blkstr2)

        try:
            if i == 0:  #header
                descblock = [w for w in words if fitz.Rect(w[:4]) in rectdesc]
                descgroupblock = groupby(sorted(descblock,
                                                key=itemgetter(3, 0)),
                                         key=itemgetter(3))
                descsentence_list_1 = [
                    " ".join(w[4] for w in gwords)
                    for y1, gwords in descgroupblock
                ]

                beer['id'] = re_number.search(blkstr2).group(1)
                t = sentence_list_blk_2.index("#" + beer['id'])
                beer['name'] = sentence_list_blk_2[t + 1]

                if descsentence_list_1 != []:
                    beer['shortdesc'] = descsentence_list_1[0]
                else:
                    print(
                        f"HEADER order problem trying to correct: {page.number} id:{beer['id']} "
                    )
                    print(f"leaving empty")
                    # beer['shortdesc'] = sentence_list_blk_2[t+2]

                if s := re_date.search(blkstr2):
                    beer['date'] = s.group(1)
                else:
                    print(f"No date data: {page.number} id:{beer['id']} ")

                abvibuog = re_realabvog.search(blkstr2)
                if abvibuog:
                    beer['real_abv'] = abvibuog.group(1)
                    beer['IBU'] = abvibuog.group(2)
                    beer['OG'] = abvibuog.group(3)
                # elif "ABV" in sentence_list_blk_2[t+3]:
                # beer['real_abv'] = re.search(r"\d+\.{0,1}\d*%",sentence_list_blk_2[t+4]).group()
                elif "ABV" in blkstr2:
                    beer['real_abv'] = re.search(r"\d+\.{0,1}\d*%",
                                                 blkstr2).group()

            elif i == 1:  #top left description
                if "THIS BEER IS" in sentence_list_blk_2 and "BASICS" in sentence_list_blk_2:
                    t1 = sentence_list_blk_2.index("THIS BEER IS")
                    t2 = sentence_list_blk_2.index("BASICS")
                    beer['description'] = " ".join(sentence_list_blk_2[t1 +
                                                                       1:t2])
                else:
                    print(
                        f"No description data: {page.number} id:{beer['id']} ")

            elif i == 2:  #basics
                if s := re_vol.search(blkstr):
                    beer['vol'] = s.group(1)
Пример #29
0
if not path.exists(input_folder):
    print ("Não existe input")
    exit(-1)


if not path.exists(assinatura_folder):
    print ("Não existe assinatura")
    exit(-2)

jessica = assinatura_folder + "image.png"
rodrigo = assinatura_folder + "image.png"
marcelo = assinatura_folder + "image.png"

# define the posdition (upper-right corner)
image_jessica = fitz.Rect(30,320,250,632)
image_rodrigo = fitz.Rect(240,320,470,632)
image_marcelo = fitz.Rect(430,320,660,632)

files = glob(input_folder + "*.pdf")
output_file = "example2.pdf"
for input_file in files: 
    # retrieve the first page of the PDF
    file_handle = fitz.open(input_file)
    first_page = file_handle[0]
    output_file = output_folder + path.split(input_file)[1]
    # add the image
    first_page.insertImage(image_jessica, filename = jessica)
    first_page.insertImage(image_rodrigo, filename = rodrigo)
    first_page.insertImage(image_marcelo, filename = marcelo)
Пример #30
0
def generate_report():
    name_ = ename.get()
    ename.delete(0,END)
    desg = desg_.get()
    loca_ = elocation.get()
    loca_ = int(loca_[-1])
    elocation.delete(0,END)
    risk_rating = int(Panel_data[Panel_data_headings[-1]][loca_-1])
    zone_ = zoneOfC(risk_rating)


    if desg=="Overman":
            if risk_rating <28:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(20,10))
                    border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoLow(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12)
                    for x in range(1,7):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[3]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12)
                    pdf.savefig(edgecolor=border.get_edgecolor())

            elif risk_rating <48:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(17,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoMed(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10)
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                
            elif risk_rating <70:
                with PdfPages(name_ + '.pdf') as pdf:
                    # Page 1
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14)
                    for x in range(2,5):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[21]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=14)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # Page 2
                    plt.figure(figsize=(10,11))   
                    border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513")                 
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
                #opening the file again to add an image on Page 2
                doc = fitz.open(name_ + '.pdf')
                rect= fitz.Rect(50,50,700,700)
                page = doc.loadPage(1)
                page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                doc.saveIncr()
            # Very high risk
            else:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoVeryHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14)
                    for x in range(1,5):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14)
                    for x in range(2,5):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[21]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # plt.close()
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold")
                    
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
             #opening the file again to add an image at the top
                doc = fitz.open(name_ + '.pdf')
                rect1= fitz.Rect(-500,30,500,650)
                rect2 = fitz.Rect(500,100,1100,580)
                page = doc.loadPage(1)
                page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg")
                doc.saveIncr()
                
    if desg=="Mining Sirdar":
            if risk_rating <28:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(20,10))
                    border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoLow(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12)
                    for x in range(1,7):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[5]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12)
                    pdf.savefig(edgecolor=border.get_edgecolor())

            elif risk_rating <48:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(17,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoMed(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10)
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                
            elif risk_rating <70:
                with PdfPages(name_ + '.pdf') as pdf:
                    # Page 1
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14)
                    for x in range(2,5):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[23]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=11)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # Page 2
                    plt.figure(figsize=(10,11))   
                    border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513")                 
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
                #opening the file again to add an image on Page 2
                doc = fitz.open(name_ + '.pdf')
                rect= fitz.Rect(50,50,700,700)
                page = doc.loadPage(1)
                page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                doc.saveIncr()

            # Very high risk
            else:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(20,10))
                    border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoVeryHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14)
                    for x in range(1,5):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14)
                    for x in range(2,5):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[23]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=13)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # plt.close()
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold")
                    
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
             #opening the file again to add an image at the top
                doc = fitz.open(name_ + '.pdf')
                rect1= fitz.Rect(-500,30,500,650)
                rect2 = fitz.Rect(500,100,1100,580)
                page = doc.loadPage(1)
                page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg")
                doc.saveIncr()
    if desg=="Shotfirer":
            if risk_rating <28:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(20,10))
                    border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoLow(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12)
                    for x in range(1,7):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[7]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12)
                    pdf.savefig(edgecolor=border.get_edgecolor())

            elif risk_rating <48:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(17,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoMed(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10)
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                
            elif risk_rating <70:
                with PdfPages(name_ + '.pdf') as pdf:
                    # Page 1
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14)
                    for x in range(2,6):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[25]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=14)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # Page 2
                    plt.figure(figsize=(10,11))   
                    border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513")                 
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
                #opening the file again to add an image on Page 2
                doc = fitz.open(name_ + '.pdf')
                rect= fitz.Rect(50,50,700,700)
                page = doc.loadPage(1)
                page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                doc.saveIncr()

            # Very high risk
            else:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoVeryHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14)
                    for x in range(1,5):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14)
                    for x in range(2,6):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[25]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=12)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # plt.close()
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold")
                    
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
             #opening the file again to add an image at the top
                doc = fitz.open(name_ + '.pdf')
                rect1= fitz.Rect(-500,30,500,650)
                rect2 = fitz.Rect(500,100,1100,580)
                page = doc.loadPage(1)
                page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg")
                doc.saveIncr()
    if desg=="Timberman":
            if risk_rating <28:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(20,10))
                    border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoLow(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12)
                    for x in range(1,7):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[9]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12)
                    pdf.savefig(edgecolor=border.get_edgecolor())

            elif risk_rating <48:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(17,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoMed(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10)
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                
            elif risk_rating <70:
                with PdfPages(name_ + '.pdf') as pdf:
                    # Page 1
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14)
                    for x in range(2,5):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[27]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=14)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # Page 2
                    plt.figure(figsize=(10,11))   
                    border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513")                 
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
                #opening the file again to add an image on Page 2
                doc = fitz.open(name_ + '.pdf')
                rect= fitz.Rect(50,50,700,700)
                page = doc.loadPage(1)
                page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                doc.saveIncr()

            # Very high risk
            else:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoVeryHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14)
                    for x in range(1,5):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14)
                    for x in range(2,5):
                        plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[27]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # plt.close()
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold")
                    
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
             #opening the file again to add an image at the top
                doc = fitz.open(name_ + '.pdf')
                rect1= fitz.Rect(-500,30,500,650)
                rect2 = fitz.Rect(500,100,1100,580)
                page = doc.loadPage(1)
                page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg")
                doc.saveIncr()

    if desg=="Common Worker":
            if risk_rating <28:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(20,10))
                    border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoLow(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12)
                    pdf.savefig(edgecolor=border.get_edgecolor())

            elif risk_rating <48:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(17,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoMed(name_,desg,loca_,zone_) 
                    for x in range(1,10):
                        plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10)
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                
            elif risk_rating <70:
                with PdfPages(name_ + '.pdf') as pdf:
                    # Page 1
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14)
                    for x in range(1,4):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 230-x*25), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # Page 2
                    plt.figure(figsize=(10,11))   
                    border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513")                 
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
                #opening the file again to add an image on Page 2
                doc = fitz.open(name_ + '.pdf')
                rect= fitz.Rect(50,50,700,700)
                page = doc.loadPage(1)
                page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                doc.saveIncr()

            # Very high risk
            else:
                with PdfPages(name_ + '.pdf') as pdf:
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold")
                    plt.axis('off')
                    headerInfoVeryHigh(name_,desg,loca_,zone_) 
                    for x in range(1,6):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14)
                    for x in range(1,5):
                        plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14)
                    pdf.savefig(edgecolor=border.get_edgecolor())
                    # plt.close()
                    plt.figure(figsize=(15,10))
                    border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513")
                    plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold")
                    
                    plt.axis('off')
                    pdf.savefig(edgecolor=border.get_edgecolor())
             #opening the file again to add an image at the top
                doc = fitz.open(name_ + '.pdf')
                rect1= fitz.Rect(-500,30,500,650)
                rect2 = fitz.Rect(500,100,1100,580)
                page = doc.loadPage(1)
                page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg")
                page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg")
                doc.saveIncr()