def multiple_paragraphs_are_converted_to_multiple_paragraphs(): result = convert_document_element_to_html( documents.document([ documents.paragraph(children=[_run_with_text("Hello")]), documents.paragraph(children=[_run_with_text("there")]), ])) assert_equal('<p>Hello</p><p>there</p>', result.value)
def multiple_paragraphs_are_converted_to_multiple_paragraphs(): result = convert_document_element_to_html( documents.document([ documents.paragraph(children=[_run_with_text("Hello")]), documents.paragraph(children=[_run_with_text("there")]), ]) ) assert_equal('<p>Hello</p><p>there</p>', result.value)
def bulleted_paragraphs_are_converted_using_matching_styles(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("Hello")], numbering=documents.numbering_level( level_index=0, is_ordered=False)), style_map=[_style_mapping("p:unordered-list(1) => ul > li:fresh")]) assert_equal('<ul><li>Hello</li></ul>', result.value)
def default_paragraph_style_is_used_if_no_matching_style_is_found(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", children=[ _run_with_text("Tip") ]), ) assert_equal('<p>Tip</p>', result.value)
def style_mappings_using_style_names_can_be_used_to_map_paragraphs(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", style_name="Tips Paragraph", children=[_run_with_text("Tip")]), style_map=[_style_mapping("p[style-name='Tips Paragraph'] => p.tip")]) assert_equal('<p class="tip">Tip</p>', result.value)
def main_document_is_found_using_package_relationships(): fileobj = _create_zip({ "word/document2.xml": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8" ?> <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:body> <w:p> <w:r> <w:t>Hello.</w:t> </w:r> </w:p> </w:body> </w:document> """), "_rels/.rels": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8"?> <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> <Relationship Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="/word/document2.xml" Id="rId1"/> </Relationships> """), }) result = docx.read(fileobj=fileobj) expected_document = documents.document([ documents.paragraph([ documents.run([ documents.text("Hello.") ]) ]) ]) assert_equal(expected_document, result.value)
def comment_references_are_linked_to_comment_after_main_body(): reference = documents.comment_reference("4") comment = documents.comment( comment_id="4", body=[_paragraph_with_text("Who's there?")], author_name="The Piemaker", author_initials="TP", ) document = documents.document( [ documents.paragraph( [_run_with_text("Knock knock"), documents.run([reference])]) ], comments=[comment], ) result = convert_document_element_to_html( document, id_prefix="doc-42-", style_map=[_style_mapping("comment-reference => sup")], ) expected_html = ( '<p>Knock knock<sup><a href="#doc-42-comment-4" id="doc-42-comment-ref-4">[TP1]</a></sup></p>' + '<dl><dt id="doc-42-comment-4">Comment [TP1]</dt><dd><p>Who\'s there? <a href="#doc-42-comment-ref-4">↑</a></p></dd></dl>' ) assert_equal(expected_html, result.value)
def can_read_text_within_document(self): element = _document_element_with_text("Hello!") assert_equal( documents.document([ documents.paragraph( [documents.run([documents.Text("Hello!")])]) ]), _read_and_get_document_xml_element(element))
def style_names_in_style_mappings_are_case_insensitive(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", style_name="Tips Paragraph", children=[_run_with_text("Tip")]), style_map=[_style_mapping("p[style-name='tips paragraph'] => p.tip")]) assert_equal('<p class="tip">Tip</p>', result.value)
def comment_references_are_linked_to_comment_after_main_body(): reference = documents.comment_reference("4") comment = documents.comment( comment_id="4", body=[_paragraph_with_text("Who's there?")], author_name="The Piemaker", author_initials="TP", ) document = documents.document( [documents.paragraph([ _run_with_text("Knock knock"), documents.run([reference]) ])], comments=[comment], ) result = convert_document_element_to_html( document, id_prefix="doc-42-", style_map=[ _style_mapping("comment-reference => sup") ], ) expected_html = ( '<p>Knock knock<sup><a href="#doc-42-comment-4" id="doc-42-comment-ref-4">[TP1]</a></sup></p>' + '<dl><dt id="doc-42-comment-4">Comment [TP1]</dt><dd><p>Who\'s there? <a href="#doc-42-comment-ref-4">↑</a></p></dd></dl>' ) assert_equal(expected_html, result.value)
def warning_is_emitted_if_paragraph_style_is_unrecognised(): result = convert_document_element_to_html( documents.paragraph(style_name="TipsParagraph", children=[ _run_with_text("Tip") ]), ) assert_equal([results.warning("Unrecognised paragraph style: TipsParagraph")], result.messages)
def main_document_is_found_using_package_relationships(): fileobj = _create_zip({ "word/document2.xml": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8" ?> <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:body> <w:p> <w:r> <w:t>Hello.</w:t> </w:r> </w:p> </w:body> </w:document> """), "_rels/.rels": textwrap.dedent("""\ <?xml version="1.0" encoding="utf-8"?> <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> <Relationship Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="/word/document2.xml" Id="rId1"/> </Relationships> """), }) result = docx.read(fileobj=fileobj) expected_document = documents.document( [documents.paragraph([documents.run([documents.text("Hello.")])])]) assert_equal(expected_document, result.value)
def bulleted_styles_dont_match_plain_paragraph(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("Hello")]), style_map=[ style_reader.read_style("p:unordered-list(1) => ul > li:fresh") ]) assert_equal('<p>Hello</p>', result.value)
def id_and_body_of_comment_is_read(): body = [xml_element("w:p")] comments = create_comments_reader(body_xml.reader())(xml_element("w:comments", {}, [ xml_element("w:comment", {"w:id": "1"}, body), ])) assert_equal(1, len(comments.value)) assert_equal(comments.value[0].body, [documents.paragraph(children=[])]) assert_equal("1", comments.value[0].comment_id)
def bulleted_paragraphs_are_converted_using_matching_styles(): result = convert_document_element_to_html( documents.paragraph( children=[_run_with_text("Hello")], numbering=documents.numbering_level(level_index=0, is_ordered=False) ), style_map=[style_reader.read_style("p:unordered-list(1) => ul > li:fresh")], ) assert_equal("<ul><li>Hello</li></ul>", result.value)
def paragraphs_are_terminated_with_newlines(): element = documents.paragraph(children=[ documents.Text("Hello "), documents.Text("world."), ], ) result = extract_raw_text_from_element(element) assert_equal("Hello world.\n\n", result)
def can_read_document_with_single_paragraph_with_single_run_of_text(self): with open(test_path("single-paragraph.docx"), "rb") as fileobj: result = docx.read(fileobj=fileobj) expected_document = documents.document([ documents.paragraph([ documents.run([documents.text("Walking on imported air")]) ]) ]) assert_equal(expected_document, result.value)
def warning_is_emitted_if_paragraph_style_is_unrecognised(): result = convert_document_element_to_html( documents.paragraph(style_id="Heading1", style_name="Heading 1", children=[_run_with_text("Tip")]), ) assert_equal([ results.warning( "Unrecognised paragraph style: Heading 1 (Style ID: Heading1)") ], result.messages)
def id_and_body_of_comment_is_read(): body = [xml_element("w:p")] comments = create_comments_reader(body_xml.reader())(xml_element( "w:comments", {}, [ xml_element("w:comment", {"w:id": "1"}, body), ])) assert_equal(1, len(comments.value)) assert_equal(comments.value[0].body, [documents.paragraph(children=[])]) assert_equal("1", comments.value[0].comment_id)
def style_mappings_using_style_names_can_be_used_to_map_paragraphs(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", style_name="Tips Paragraph", children=[ _run_with_text("Tip") ]), style_map=[ _style_mapping("p[style-name='Tips Paragraph'] => p.tip") ] ) assert_equal('<p class="tip">Tip</p>', result.value)
def children_are_recursively_converted_to_text(): element = documents.document([ documents.paragraph( [documents.text("Hello "), documents.text("world.")], {}) ]) result = extract_raw_text_from_element(element) assert_equal("Hello world.\n\n", result)
def style_names_in_style_mappings_are_case_insensitive(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", style_name="Tips Paragraph", children=[ _run_with_text("Tip") ]), style_map=[ _style_mapping("p[style-name='tips paragraph'] => p.tip") ] ) assert_equal('<p class="tip">Tip</p>', result.value)
def _assert_children_are_converted_normally(tag_name): element = xml_element("w:p", {}, [ xml_element(tag_name, {}, [ xml_element("w:r") ]) ]) assert_equal( documents.paragraph([documents.run([])]), _read_and_get_document_xml_element(element) )
def default_paragraph_style_is_specified_by_mapping_plain_paragraphs(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", children=[ _run_with_text("Tip") ]), style_map=[ _style_mapping("p => p.tip") ] ) assert_equal('<p class="tip">Tip</p>', result.value)
def children_of_w_smart_tag_are_converted_normally(self): element = xml_element("w:p", {}, [ xml_element("w:smartTag", {}, [ xml_element("w:r") ]) ]) assert_equal( documents.paragraph([documents.run([])]), _read_and_get_document_xml_element(element) )
def default_paragraph_style_is_specified_by_mapping_plain_paragraphs(): result = convert_document_element_to_html( documents.paragraph(style_id="TipsParagraph", children=[ _run_with_text("Tip") ]), style_map=[ style_reader.read_style("p => p.tip") ] ) assert_equal('<p class="tip">Tip</p>', result.value)
def footnotes_of_document_are_read(self): notes = [documents.note("footnote", "4", [documents.paragraph([])])] body_xml = xml_element("w:body") document_xml = xml_element("w:document", {}, [body_xml]) document = _read_and_get_document_xml_element(document_xml, notes=notes) footnote = document.notes.find_note("footnote", "4") assert_equal("4", footnote.note_id) assert isinstance(footnote.body[0], documents.Paragraph)
def paragraphs_are_converted_by_satisfying_matching_paths(): result = convert_document_element_to_html( documents.paragraph(style_name="TipsParagraph", children=[ _run_with_text("Tip") ]), style_map=[ style_reader.read_style("p.TipsParagraph => p.tip") ] ) assert_equal('<p class="tip">Tip</p>', result.value)
def bulleted_styles_dont_match_plain_paragraph(): result = convert_document_element_to_html( documents.paragraph(children=[ _run_with_text("Hello") ]), style_map=[ _style_mapping("p:unordered-list(1) => ul > li:fresh") ] ) assert_equal('<p>Hello</p>', result.value)
def can_read_document_with_single_paragraph_with_single_run_of_text(self): with open(test_path("single-paragraph.docx"), "rb") as fileobj: result = docx.read(fileobj=fileobj) expected_document = documents.document([ documents.paragraph([ documents.run([ documents.text("Walking on imported air") ]) ]) ]) assert_equal(expected_document, result.value)
def footnotes_are_included_after_the_main_body(): footnote_reference = documents.note_reference("footnote", "4") document = documents.document( [documents.paragraph([_run_with_text("Knock knock"), documents.run([footnote_reference])])], notes=documents.notes([documents.note("footnote", "4", [_paragraph_with_text("Who's there?")])]), ) result = convert_document_element_to_html(document, id_prefix="doc-42") expected_html = ( '<p>Knock knock<sup><a href="#doc-42-footnote-4" id="doc-42-footnote-ref-4">[1]</a></sup></p>' + '<ol><li id="doc-42-footnote-4"><p>Who\'s there? <a href="#doc-42-footnote-ref-4">↑</a></p></li></ol>' ) assert_equal(expected_html, result.value)
def word_table_is_read_as_document_table_element(self): element = xml_element("w:tbl", {}, [ xml_element("w:tr", {}, [ xml_element("w:tc", {}, [xml_element("w:p", {}, [])]), ]), ]) table = _read_and_get_document_xml_element(element) expected_result = documents.table([ documents.table_row( [documents.table_cell([documents.paragraph([])])]) ]) assert_equal(expected_result, table)
def comments_are_ignored_by_default(): reference = documents.comment_reference("4") comment = documents.comment( comment_id="4", body=[_paragraph_with_text("Who's there?")], ) document = documents.document( [documents.paragraph([ _run_with_text("Knock knock"), documents.run([reference]) ])], comments=[comment], ) result = convert_document_element_to_html(document, id_prefix="doc-42-") expected_html = '<p>Knock knock</p>' assert_equal(expected_html, result.value)
def gridspan_is_read_as_colspan_for_table_cell(self): element = xml_element("w:tbl", {}, [ xml_element("w:tr", {}, [ xml_element("w:tc", {}, [ xml_element("w:tcPr", {}, [xml_element("w:gridSpan", {"w:val": "2"})]), xml_element("w:p", {}, []) ]), ]), ]) table = _read_and_get_document_xml_element(element) expected_result = documents.table([ documents.table_row( [documents.table_cell([documents.paragraph([])], colspan=2)]) ]) assert_equal(expected_result, table)
def word_table_is_read_as_document_table_element(self): element = xml_element("w:tbl", {}, [ xml_element("w:tr", {}, [ xml_element("w:tc", {}, [ xml_element("w:p", {}, []) ]), ]), ]) table = _read_and_get_document_xml_element(element) expected_result = documents.table([ documents.table_row([ documents.table_cell([ documents.paragraph([]) ]) ]) ]) assert_equal(expected_result, table)
def footnotes_are_included_after_the_main_body(): footnote_reference = documents.note_reference("footnote", "4") document = documents.document( [documents.paragraph([ _run_with_text("Knock knock"), documents.run([footnote_reference]) ])], notes=documents.notes([ documents.note("footnote", "4", [_paragraph_with_text("Who's there?")]) ]) ) result = convert_document_element_to_html( document, id_prefix="doc-42" ) expected_html = ('<p>Knock knock<sup><a href="#doc-42-footnote-4" id="doc-42-footnote-ref-4">[1]</a></sup></p>' + '<ol><li id="doc-42-footnote-4"><p>Who\'s there? <a href="#doc-42-footnote-ref-4">↑</a></p></li></ol>') assert_equal(expected_html, result.value)
def gridspan_is_read_as_colspan_for_table_cell(self): element = xml_element("w:tbl", {}, [ xml_element("w:tr", {}, [ xml_element("w:tc", {}, [ xml_element("w:tcPr", {}, [ xml_element("w:gridSpan", {"w:val": "2"}) ]), xml_element("w:p", {}, []) ]), ]), ]) table = _read_and_get_document_xml_element(element) expected_result = documents.table([ documents.table_row([ documents.table_cell([ documents.paragraph([]) ], colspan=2) ]) ]) assert_equal(expected_result, table)
def footnotes_are_included_after_the_main_body(): footnote_reference = documents.footnote_reference("4") document = documents.document( [ documents.paragraph([ _run_with_text("Knock knock"), documents.run([footnote_reference]) ]) ], footnotes=documents.Footnotes({ "4": documents.Footnote("4", [_paragraph_with_text("Who's there?")]) })) result = convert_document_element_to_html(document, generate_uniquifier=lambda: 42) expected_html = ( '<p>Knock knock<sup><a href="#footnote-42-4" id="footnote-ref-42-4">[1]</a></sup></p>' + '<ol><li id="footnote-42-4"><p>Who\'s there? <a href="#footnote-ref-42-4">↑</a></p></li></ol>' ) assert_equal(expected_html, result.value)
def empty_paragraphs_are_ignored(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("")]) ) assert_equal('', result.value)
def includes_indirect_descendants(self): grandchild = documents.text("grandchild") child = documents.run(children=[grandchild]) element = documents.paragraph(children=[child]) assert_equal([grandchild, child], get_descendants(element))
def non_paragraph_elements_are_not_transformed(self): paragraph = documents.paragraph(children=[]) result = transforms.run(lambda _: documents.tab())(paragraph) assert_equal(documents.paragraph(children=[]), result)
def _paragraph_with_text(text): return documents.paragraph(children=[_run_with_text(text)])
def empty_paragraphs_are_ignored(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("")])) assert_equal('', result.value)
def filters_descendants_to_type(self): tab = documents.tab() run = documents.run(children=[]) element = documents.paragraph(children=[tab, run]) assert_equal([run], get_descendants_of_type(element, documents.Run))
def includes_children(self): children = [documents.text("child 1"), documents.text("child 2")] element = documents.paragraph(children=children) assert_equal(children, get_descendants(element))
def raw_text_of_paragraph_is_terminated_with_newlines(): paragraph = documents.paragraph(children=[documents.Text("Hello")]) assert_equal("Hello\n\n", extract_raw_text_from_element(paragraph))
def can_read_text_within_paragraph(self): element = _paragraph_element_with_text("Hello!") assert_equal( documents.paragraph([documents.run([documents.Text("Hello!")])]), _read_and_get_document_xml_element(element) )
def plain_paragraph_is_converted_to_plain_paragraph(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("Hello")])) assert_equal('<p>Hello</p>', result.value)
def no_warning_if_there_is_no_style_for_plain_paragraphs(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("Tip")]), ) assert_equal([], result.messages)
def plain_paragraph_is_converted_to_plain_paragraph(): result = convert_document_element_to_html( documents.paragraph(children=[_run_with_text("Hello")]) ) assert_equal('<p>Hello</p>', result.value)
def returns_nothing_if_element_has_empty_children(self): assert_equal([], get_descendants(documents.paragraph(children=[])))