def main_document_is_found_using_package_relationships(): fileobj = _create_zip({ "word/document2.xml": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8" ?> <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:body> <w:p> <w:r> <w:t>Hello.</w:t> </w:r> </w:p> </w:body> </w:document> """), "_rels/.rels": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8"?> <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> <Relationship Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="/word/document2.xml" Id="rId1"/> </Relationships> """), }) result = docx.read(fileobj=fileobj) expected_document = documents.document([ documents.paragraph([ documents.run([ documents.text("Hello.") ]) ]) ]) assert_equal(expected_document, result.value)
def style_mapping_for_underline_runs_does_not_close_parent_elements(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_underline=True, is_bold=True), style_map=[_style_mapping("u => em")]) assert_equal("<strong><em>Hello</em></strong>", result.value)
def can_read_text_within_document(self): element = _document_element_with_text("Hello!") assert_equal( documents.document([ documents.paragraph( [documents.run([documents.Text("Hello!")])]) ]), _read_and_get_document_xml_element(element))
def hyperlink_is_ignored_if_it_does_not_have_a_relationship_id_nor_anchor(self): run_element = xml_element("w:r") element = xml_element("w:hyperlink", {}, [run_element]) assert_equal( [documents.run([])], _read_and_get_document_xml_element(element) )
def main_document_is_found_using_package_relationships(): fileobj = _create_zip({ "word/document2.xml": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8" ?> <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:body> <w:p> <w:r> <w:t>Hello.</w:t> </w:r> </w:p> </w:body> </w:document> """), "_rels/.rels": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8"?> <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> <Relationship Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="/word/document2.xml" Id="rId1"/> </Relationships> """), }) result = docx.read(fileobj=fileobj) expected_document = documents.document( [documents.paragraph([documents.run([documents.text("Hello.")])])]) assert_equal(expected_document, result.value)
def hyperlink_is_read_as_internal_hyperlink_if_it_has_an_anchor_attribute(self): run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"w:anchor": "start"}, [run_element]) assert_equal( documents.hyperlink(anchor="start", children=[documents.run([])]), _read_and_get_document_xml_element(element) )
def comment_references_are_linked_to_comment_after_main_body(): reference = documents.comment_reference("4") comment = documents.comment( comment_id="4", body=[_paragraph_with_text("Who's there?")], author_name="The Piemaker", author_initials="TP", ) document = documents.document( [ documents.paragraph( [_run_with_text("Knock knock"), documents.run([reference])]) ], comments=[comment], ) result = convert_document_element_to_html( document, id_prefix="doc-42-", style_map=[_style_mapping("comment-reference => sup")], ) expected_html = ( '<p>Knock knock<sup><a href="#doc-42-comment-4" id="doc-42-comment-ref-4">[TP1]</a></sup></p>' + '<dl><dt id="doc-42-comment-4">Comment [TP1]</dt><dd><p>Who\'s there? <a href="#doc-42-comment-ref-4">↑</a></p></dd></dl>' ) assert_equal(expected_html, result.value)
def subscript_runs_are_wrapped_in_sub_tags(): result = convert_document_element_to_html( documents.run( children=[documents.text("Hello")], vertical_alignment=documents.VerticalAlignment.subscript, ), ) assert_equal("<sub>Hello</sub>", result.value)
def comment_references_are_linked_to_comment_after_main_body(): reference = documents.comment_reference("4") comment = documents.comment( comment_id="4", body=[_paragraph_with_text("Who's there?")], author_name="The Piemaker", author_initials="TP", ) document = documents.document( [documents.paragraph([ _run_with_text("Knock knock"), documents.run([reference]) ])], comments=[comment], ) result = convert_document_element_to_html( document, id_prefix="doc-42-", style_map=[ _style_mapping("comment-reference => sup") ], ) expected_html = ( '<p>Knock knock<sup><a href="#doc-42-comment-4" id="doc-42-comment-ref-4">[TP1]</a></sup></p>' + '<dl><dt id="doc-42-comment-4">Comment [TP1]</dt><dd><p>Who\'s there? <a href="#doc-42-comment-ref-4">↑</a></p></dd></dl>' ) assert_equal(expected_html, result.value)
def hyperlink_is_read_if_it_has_an_anchor_attribute(self): run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"w:anchor": "start"}, [run_element]) assert_equal( documents.hyperlink(anchor="start", children=[documents.run([])]), _read_and_get_document_xml_element(element) )
def hyperlink_is_ignored_if_it_does_not_have_a_relationship_id_nor_anchor(self): run_element = xml_element("w:r") element = xml_element("w:hyperlink", {}, [run_element]) assert_equal( documents.run([]), _read_and_get_document_xml_element(element) )
def hyperlink_is_read_if_it_has_a_relationship_id(self): relationships = Relationships({"r42": Relationship(target="http://example.com")}) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42"}, [run_element]) assert_equal( documents.hyperlink(href="http://example.com", children=[documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships), )
def style_mapping_for_underline_runs_does_not_close_parent_elements(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_underline=True, is_bold=True), style_map=[ _style_mapping("u => em") ] ) assert_equal("<strong><em>Hello</em></strong>", result.value)
def strikethrough_runs_can_be_configured_with_style_mapping(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_strikethrough=True), style_map=[ _style_mapping("strike => del") ] ) assert_equal("<del>Hello</del>", result.value)
def small_caps_runs_can_be_mapped_using_style_mapping(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_small_caps=True), style_map=[ _style_mapping("small-caps => span") ] ) assert_equal("<span>Hello</span>", result.value)
def runs_are_converted_by_satisfying_matching_paths(): result = convert_document_element_to_html( documents.run(style_id="TipsRun", children=[documents.Text("Tip")]), style_map=[ _style_mapping("r.TipsRun => span.tip") ] ) assert_equal('<span class="tip">Tip</span>', result.value)
def runs_are_converted_by_satisfying_matching_paths(): result = convert_document_element_to_html( documents.run(style_id="TipsRun", children=[documents.Text("Tip")]), style_map=[ style_reader.read_style("r.TipsRun => span.tip") ] ) assert_equal('<span class="tip">Tip</span>', result.value)
def underline_runs_can_be_mapped_using_style_mapping(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_underline=True), style_map=[ _style_mapping("u => em") ] ) assert_equal("<em>Hello</em>", result.value)
def hyperlink_is_read_if_it_has_a_relationship_id(self): relationships = Relationships( {"r42": Relationship(target="http://example.com")}) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42"}, [run_element]) assert_equal( documents.hyperlink("http://example.com", [documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships))
def can_read_document_with_single_paragraph_with_single_run_of_text(self): with open(test_path("single-paragraph.docx"), "rb") as fileobj: result = docx.read(fileobj=fileobj) expected_document = documents.document([ documents.paragraph([ documents.run([documents.text("Walking on imported air")]) ]) ]) assert_equal(expected_document, result.value)
def existing_fragment_is_replaced_when_anchor_is_set_on_external_link(self): relationships = Relationships([ _hyperlink_relationship("r42", "http://example.com/#previous"), ]) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42", "w:anchor": "fragment"}, [run_element]) assert_equal( documents.hyperlink(href="http://example.com/#fragment", children=[documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships) )
def children_of_w_smart_tag_are_converted_normally(self): element = xml_element("w:p", {}, [ xml_element("w:smartTag", {}, [ xml_element("w:r") ]) ]) assert_equal( documents.paragraph([documents.run([])]), _read_and_get_document_xml_element(element) )
def _assert_children_are_converted_normally(tag_name): element = xml_element("w:p", {}, [ xml_element(tag_name, {}, [ xml_element("w:r") ]) ]) assert_equal( documents.paragraph([documents.run([])]), _read_and_get_document_xml_element(element) )
def hyperlink_is_read_as_external_hyperlink_if_it_has_a_relationship_id_and_an_anchor(self): relationships = Relationships([ _hyperlink_relationship("r42", "http://example.com/"), ]) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42", "w:anchor": "fragment"}, [run_element]) assert_equal( documents.hyperlink(href="http://example.com/#fragment", children=[documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships) )
def can_read_document_with_single_paragraph_with_single_run_of_text(self): with open(test_path("single-paragraph.docx"), "rb") as fileobj: result = docx.read(fileobj=fileobj) expected_document = documents.document([ documents.paragraph([ documents.run([ documents.text("Walking on imported air") ]) ]) ]) assert_equal(expected_document, result.value)
def footnotes_are_included_after_the_main_body(): footnote_reference = documents.note_reference("footnote", "4") document = documents.document( [documents.paragraph([_run_with_text("Knock knock"), documents.run([footnote_reference])])], notes=documents.notes([documents.note("footnote", "4", [_paragraph_with_text("Who's there?")])]), ) result = convert_document_element_to_html(document, id_prefix="doc-42") expected_html = ( '<p>Knock knock<sup><a href="#doc-42-footnote-4" id="doc-42-footnote-ref-4">[1]</a></sup></p>' + '<ol><li id="doc-42-footnote-4"><p>Who\'s there? <a href="#doc-42-footnote-ref-4">↑</a></p></li></ol>' ) assert_equal(expected_html, result.value)
def comments_are_ignored_by_default(): reference = documents.comment_reference("4") comment = documents.comment( comment_id="4", body=[_paragraph_with_text("Who's there?")], ) document = documents.document( [documents.paragraph([ _run_with_text("Knock knock"), documents.run([reference]) ])], comments=[comment], ) result = convert_document_element_to_html(document, id_prefix="doc-42-") expected_html = '<p>Knock knock</p>' assert_equal(expected_html, result.value)
def footnotes_are_included_after_the_main_body(): footnote_reference = documents.note_reference("footnote", "4") document = documents.document( [documents.paragraph([ _run_with_text("Knock knock"), documents.run([footnote_reference]) ])], notes=documents.notes([ documents.note("footnote", "4", [_paragraph_with_text("Who's there?")]) ]) ) result = convert_document_element_to_html( document, id_prefix="doc-42" ) expected_html = ('<p>Knock knock<sup><a href="#doc-42-footnote-4" id="doc-42-footnote-ref-4">[1]</a></sup></p>' + '<ol><li id="doc-42-footnote-4"><p>Who\'s there? <a href="#doc-42-footnote-ref-4">↑</a></p></li></ol>') assert_equal(expected_html, result.value)
def footnotes_are_included_after_the_main_body(): footnote_reference = documents.footnote_reference("4") document = documents.document( [ documents.paragraph([ _run_with_text("Knock knock"), documents.run([footnote_reference]) ]) ], footnotes=documents.Footnotes({ "4": documents.Footnote("4", [_paragraph_with_text("Who's there?")]) })) result = convert_document_element_to_html(document, generate_uniquifier=lambda: 42) expected_html = ( '<p>Knock knock<sup><a href="#footnote-42-4" id="footnote-ref-42-4">[1]</a></sup></p>' + '<ol><li id="footnote-42-4"><p>Who\'s there? <a href="#footnote-ref-42-4">↑</a></p></li></ol>' ) assert_equal(expected_html, result.value)
def strikethrough_runs_are_wrapped_in_s_elements_by_default(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_strikethrough=True), ) assert_equal("<s>Hello</s>", result.value)
def italic_runs_can_be_configured_with_style_mapping(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_italic=True), style_map=[_style_mapping("i => strong")] ) assert_equal("<strong>Hello</strong>", result.value)
def run_is_transformed(self): run = documents.run(children=[]) result = transforms.run(lambda _: documents.tab())(run) assert_equal(documents.tab(), result)
def unrecognised_children_are_ignored(): element = xml_element("w:r", {}, [_text_element("Hello!"), xml_element("w:huh", {}, [])]) assert_equal( documents.run([documents.Text("Hello!")]), _read_document_xml_element(element).value )
def small_caps_runs_are_ignored_by_default(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_small_caps=True), ) assert_equal("Hello", result.value)
def text_nodes_are_ignored_when_reading_children(): element = xml_element("w:r", {}, [xml_text("[text]")]) assert_equal( documents.run([]), _read_and_get_document_xml_element(element) )
def _run_with_text(text): return documents.run(children=[documents.text(text)])
def italic_runs_can_be_configured_with_style_mapping(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_italic=True), style_map=[_style_mapping("i => strong")]) assert_equal("<strong>Hello</strong>", result.value)
def filters_descendants_to_type(self): tab = documents.tab() run = documents.run(children=[]) element = documents.paragraph(children=[tab, run]) assert_equal([run], get_descendants_of_type(element, documents.Run))
def includes_indirect_descendants(self): grandchild = documents.text("grandchild") child = documents.run(children=[grandchild]) element = documents.paragraph(children=[child]) assert_equal([grandchild, child], get_descendants(element))
def unrecognised_children_are_ignored(self): element = xml_element("w:r", {}, [_text_element("Hello!"), xml_element("w:huh", {}, [])]) assert_equal( documents.run([documents.Text("Hello!")]), _read_document_xml_element(element).value )
def underline_runs_can_be_wrapped_in_tags(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_underline=True), convert_underline=mammoth.underline.element("u")) assert_equal("<u>Hello</u>", result.value)
def bold_runs_are_wrapped_in_strong_tags_by_default(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_bold=True), ) assert_equal("<strong>Hello</strong>", result.value)
def underline_runs_are_ignored_by_default(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_underline=True), ) assert_equal("Hello", result.value)
def bold_runs_can_be_configured_with_style_mapping(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_bold=True), style_map=[_style_mapping("b => em")] ) assert_equal("<em>Hello</em>", result.value)
def bold_runs_are_wrapped_in_strong_tags(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_bold=True), ) assert_equal("<strong>Hello</strong>", result.value)
def italic_runs_are_wrapped_in_emphasis_tags_by_default(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_italic=True), ) assert_equal("<em>Hello</em>", result.value)
def can_read_text_within_run(): element = _run_element_with_text("Hello!") assert_equal( documents.run([documents.Text("Hello!")]), _read_and_get_document_xml_element(element) )
def italic_runs_are_wrapped_in_emphasis_tags(): result = convert_document_element_to_html( documents.run(children=[documents.text("Hello")], is_italic=True), ) assert_equal("<em>Hello</em>", result.value)
def can_read_text_within_paragraph(self): element = _paragraph_element_with_text("Hello!") assert_equal( documents.paragraph([documents.run([documents.Text("Hello!")])]), _read_and_get_document_xml_element(element) )