示例#1
0
 def test_pull_image_files(self) -> None:
     """Copy image files to output path."""
     docx2python("resources/example.docx", "delete_this/path/to/images")
     assert os.listdir("delete_this/path/to/images") == [
         "image1.png", "image2.jpg"
     ]
     # clean up
     shutil.rmtree("delete_this")
示例#2
0
 def test_my_checkbox(self) -> None:
     """A good selection of checked and unchecked boxes, and several dropdowns"""
     extraction = docx2python(os.path.join("resources", "check_drop_my.docx"))
     assert extraction.body == [
         [
             [
                 [
                     "[user unchecked]\u2610[user unchecked]",
                     "",
                     "[user checked]\u2612[user checked]",
                     "",
                     "[my unchecked]\u2610[my unchecked]",
                     "",
                     "[my checked]\u2612[my checked]",
                     "",
                     "User dropdown (Piihan B)",
                     "Piihan B",
                     "",
                     "My dropdown (no choice)",
                     "Choose an item.",
                     "",
                     "My dropdown (chose A)",
                     "my_item_A",
                     "",
                     "My dropdown (chose B)",
                     "my_item_B",
                 ]
             ]
         ]
     ]
 def test_bulleted_lists(self) -> None:
     pars = docx2python("resources/created-in-pages-bulleted-lists.docx")
     assert pars.text == (
         "\n\nThis is a document for testing docx2python module.\n\n\n\n--\tWhy "
         "did the chicken cross the road?\n\n\t--\tJust because\n\n\t--\tDon't "
         "know\n\n\t--\tTo get to the other side\n\n--\tWhat's the meaning of life, "
         "universe and everything?\n\n\t--\t42\n\n\t--\t0\n\n\t--\t-1\n\n")
 def test_paragraphs_only(self) -> None:
     """Run without issue"""
     pars = docx2python("resources/created-in-pages-paragraphs-only.docx")
     assert pars.text == (
         "\n\nThis is a document for testing docx2python module.\n\n\n\nThis "
         "document contains paragraphs.\n\n\n\nThis document does not contain any "
         "bulleted lists.\n\n")
示例#5
0
 def test_prints(self) -> None:
     """
     Open a docx with ``word/document.xml`` renamed to ``word/blah_blah.xml``
     and all references updated. Test that text extracts as expected."""
     extraction = docx2python(
         os.path.join("resources", "renamed_document_xml.docx"))
     assert ('<a href="http://www.shayallenhill.com/">my website</a>'
             in extraction.text)
示例#6
0
 def test_user_unchecked_dropdown1(self) -> None:
     """Get unchecked box glyph and first dd entry"""
     extraction = docx2python(os.path.join("resources", "unchecked_drop0.docx"))
     assert extraction.text == "\u2610 \n\n\n\n\n\nPiihan B"
示例#7
0
 def test_user_checked_dropdown0(self) -> None:
     """Get checked-out box glyph and second dd entry"""
     extraction = docx2python(os.path.join("resources", "checked_drop1.docx"))
     assert extraction.text == "\u2612 \n\n\n\n\n\nPIlihan A"
示例#8
0
 def test_extraction(self) -> None:
     """Image placeholder inserted into extracted text."""
     extraction = docx2python(os.path.join("resources", "has_pict.docx"))
     assert "image1.png" in extraction.images
     assert "----media/image1.png----" in extraction.text
示例#9
0
#!/usr/bin/env python3
# _*_ coding: utf-8 _*_
"""Test full functionality of source_old

:author: Shay Hill
:created: 7/5/2019
"""

import os
import shutil
import re

from docx2python.main import docx2python
from docx2python.iterators import iter_at_depth

OUTPUT = docx2python("resources/example.docx")
HTML_OUTPUT = docx2python("resources/example.docx", html=True)


class TestFormatting:
    """Nested list output string formatting"""
    def test_header(self) -> None:
        """Header text in correct location"""
        header_text = "".join(iter_at_depth(OUTPUT.header, 4))
        assert re.match(r"Header text----media/image\d+\.\w+----$",
                        header_text)

    def test_footer(self) -> None:
        """Footer text in correct location"""
        footer_text = "".join(iter_at_depth(OUTPUT.footer, 4))
        assert re.match(r"Footer text----media/image\d+\.\w+----$",
#!/usr/bin/env python3
# _*_ coding: utf-8 _*_
"""Test features of DocxContent that weren't tested in test_docx2python.

:author: Shay Hill
:created: 7/6/2019
"""

from docx2python.main import docx2python

INST = docx2python("resources/example.docx")


class TestDocument:
    def test_combine_of_header_body_footer(self) -> None:
        """Return all content combined as instance.document """
        assert (INST.document == INST.header + INST.body + INST.footer +
                INST.footnotes + INST.endnotes)

    def test_read_only(self) -> None:
        """Document attribute is read only."""
        doc1 = INST.document
        doc1 = doc1[:1]
        assert doc1 != INST.document
        assert (INST.document == INST.header + INST.body + INST.footer +
                INST.footnotes + INST.endnotes)


class TestText:
    def test_function(self) -> None:
        """Return '\n\n'-delimited paragraphs as instance.text. """
示例#11
0
def test_dop_1013a() -> None:
    """Misidentifies ``word/document.xml`` as ``word/word/document.xml``"""
    docx2python("resources/example.docx")
    # noinspection SpellCheckingInspection
    docx2python("resources/240-DOP-1013A Lay Down Tubulars.docx")
示例#12
0
 def test_prints(self) -> None:
     """Pull the text of the hyperlink"""
     extraction = docx2python(os.path.join("resources", "hyperlink.docx"))
     assert (
         '<a href="http://www.shayallenhill.com/">my website</a>' in extraction.text
     )
示例#13
0
 def test_get_toc_text(self) -> None:
     """Extract header text from table-of-contents header."""
     assert docx2python("resources/zen_of_python.docx").text[:66] == (
         'Contents\n\n\tBeautiful is better than ugly.'
         '\t1</a>\n\n\n\n\n\n\n\nBeautiful i')
示例#14
0
 def test_run(self) -> None:
     """Run original code and see how it works"""
     extraction = docx2python("resources/has_pict.docx")
     assert "image1.png" in extraction.images
     assert "----media/image1.png----" in extraction.text