Python get_context示例

编程语言: Python

命名空间/包名称: docx2python.docx_context

方法/功能: get_context

hotexamples.com的示例: 6

Python get_context - 已找到6个示例。这些是从开源项目中提取的最受好评的docx2python.docx_context.get_context现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def parse(cls, filename: str) -> List[dict]:
        f = zipfile.ZipFile(filename)
        context = get_context(f)
        # 读取主文档 xml
        document = context.get("officeDocument")
        # 读取文档关联 xml
        content_rels = (context.get("content_path2rels") or {}).get(document)

        # 读取主文档
        xml_content = f.read(document)
        tree = etree.fromstring(xml_content)

        section = []
        for index, paragraph in enumerate(cls._iterparagraphs(tree)):
            content = [
                text.strip()
                for text in cls._itertext(paragraph, f, content_rels)
                if text.strip()
            ]
            if content:
                section.append({
                    "page": str(index + 1),
                    "context": " ".join(content)
                })
        return section

示例#2

显示文件

文件： main.py 项目： royn5618/docx2python

def docx2python(docx_filename: str,
                image_folder: Optional[str] = None,
                html: bool = False,
                extract_image: bool = True) -> DocxContent:
    """Unzip a docx file and extract contents.

    There's a bit of ugly try/except toward the bottom.

    One file in 5300 had the headers and footers mislabeled in
    ``word/_rels.document.xml.rels``. Instead of ``header.xml``, this had the
    header identified as ``word/header.xml``. After trying with
    ``content_dir/file``, try again with just ``file``.
    """
    zipf = zipfile.ZipFile(docx_filename)
    context = get_context(zipf)
    context["do_html"] = html

    def file_text(filename_):
        context["rId2Target"] = {
            x["Id"]: x["Target"]
            for x in context["content_path2rels"][filename_]
        }

        try:
            unzipped = zipf.read(filename_)
        except KeyError:
            # content dir specified twice
            unzipped = zipf.read('/'.join(Path(filename_).parts[1:]))
        return get_text(unzipped, context)

    header = [file_text(filename) for filename in context["headers"]]
    header = [x for y in header for x in y]

    body = file_text(context["officeDocument"])

    footer = [file_text(filename) for filename in context["footers"]]
    footer = [x for y in footer for x in y]

    footnotes = [file_text(filename) for filename in context["footnotes"]]
    footnotes = [x for y in footnotes for x in y]

    endnotes = [file_text(filename) for filename in context["endnotes"]]
    endnotes = [x for y in endnotes for x in y]

    if extract_image:
        images = pull_image_files(zipf, context, image_folder)
    else:
        images = None

    zipf.close()
    return DocxContent(
        header=header,
        body=body,
        footer=footer,
        footnotes=footnotes,
        endnotes=endnotes,
        images=images,
        properties=context["docProp2text"],
    )

示例#3

显示文件

文件： test_docx_context.py 项目： royn5618/docx2python

 def test_no_image_files(self) -> None:
     """Pass silently when no image files."""
     zipf = zipfile.ZipFile("resources/basic.docx")
     context = get_context(zipf)
     pull_image_files(zipf, context, "delete_this/path/to/images")
     assert os.listdir("delete_this/path/to/images") == []
     # clean up
     shutil.rmtree("delete_this")

示例#4

显示文件

文件： test_docx_context.py 项目： royn5618/docx2python

 def test_pull_image_files(self) -> None:
     """Copy image files to output path."""
     zipf = zipfile.ZipFile("resources/example.docx")
     context = get_context(zipf)
     pull_image_files(zipf, context, "delete_this/path/to/images")
     assert os.listdir("delete_this/path/to/images") == [
         "image1.png", "image2.jpg"
     ]
     # clean up
     shutil.rmtree("delete_this")

示例#5

显示文件

文件： test_docx_context.py 项目： royn5618/docx2python

 def test_lists(self) -> None:
     """Pass silently when no numbered or bulleted lists."""
     zipf = zipfile.ZipFile("resources/basic.docx")
     context = get_context(zipf)
     assert "numId2numFmts" not in context
     assert "numId2count" not in context

示例#6

显示文件

文件： test_docx_context.py 项目： royn5618/docx2python

def docx_context() -> Dict[str, Any]:
    """result of running strip_text.get_context"""
    zipf = zipfile.ZipFile("resources/example.docx")
    return get_context(zipf)