def test_build_tbl__no_seq(): builder = FormexBuilder() table1 = Table() table1.rows[1].insert_cell(u"text1") table1_elem = builder.build_tbl(table1) table2 = Table() table2.rows[1].insert_cell(u"text2") table2_elem = builder.build_tbl(table2) assert table1_elem.attrib['NO.SEQ'] == u"0001" assert table2_elem.attrib['NO.SEQ'] == u"0002"
def test_build_cell__align(cell_styles, expected): builder = CalsBuilder() # -- create a minimal <row> element row_elem = etree.XML("<row/>") # -- setup a minimal cell p_elem = etree.XML("<p>text</p>") cell = Cell([p_elem], x=1, y=1, styles=cell_styles) builder.setup_table(Table([cell])) # -- build the cell builder.build_cell(row_elem, cell) # -- check the '<entry>' attributes entry_elem = row_elem[0] # type: ElementType assert entry_elem.tag == u"entry" # we don't want to check @colsep/@rowsep here actual = dict(entry_elem.attrib) actual.pop("colsep", None) actual.pop("rowsep", None) assert actual == expected assert entry_elem[0] == p_elem
def test_build_cell__with_sep(cell_styles, expected): # .. note:: # # CALS default value for @colsep and @rowsep is "1". builder = CalsBuilder() # -- create a minimal <row> element row_elem = etree.XML("<row/>") # -- setup a minimal cell p_elem = etree.XML("<p>text</p>") cell_x1_y1 = Cell([p_elem], x=1, y=1, styles=cell_styles) cell_x2_y1 = Cell([p_elem], x=2, y=1, styles=cell_styles) cell_x1_y2 = Cell([p_elem], x=1, y=2, styles=cell_styles) cell_x2_y2 = Cell([p_elem], x=2, y=2, styles=cell_styles) builder.setup_table(Table([cell_x1_y1, cell_x2_y1, cell_x1_y2, cell_x2_y2])) # -- build the cell builder.build_cell(row_elem, cell_x1_y1) # -- check the '<entry>' attributes entry_elem = row_elem[0] # type: ElementType assert entry_elem.tag == u"entry" assert entry_elem.attrib == expected assert entry_elem[0] == p_elem
def test_build_title__subtitle(): table = Table() content = [P(u"TITLE"), P(u"Subtitle 1"), P(u"Subtitle 2")] table.rows[1].insert_cell(content, styles={"align": "center"}) builder = FormexBuilder() tbl_elem = TBL() builder.build_title(tbl_elem, table.rows[0]) # -- check the '<TITLE>' attributes title_elem = tbl_elem[0] # type: etree._Element xml_parser = etree.XMLParser(remove_blank_text=True) # fmt: off expected = etree.XML(u"""\ <TITLE> <TI> <P>TITLE</P> </TI> <STI> <P>Subtitle 1</P> <P>Subtitle 2</P> </STI> </TITLE>""", parser=xml_parser) # fmt: on diff_list = xmldiff.main.diff_trees(title_elem, expected) if diff_list: print(etree.tounicode(title_elem, pretty_print=True, with_tail=False), file=sys.stderr) assert diff_list == []
def test_fill_missing_cells(): # fmt: off table = Table() table.rows[1].insert_cell("one") table.rows[1].insert_cell("two") table.rows[1].insert_cell("three") table.rows[1].insert_cell("four", height=2) table.rows[2].insert_cell("un-deux", width=2) table.fill_missing(table.bounding_box, "???") text = str(table) assert text == textwrap.dedent("""\ +-----------+-----------+-----------+-----------+ | one | two | three | four | +-----------------------+-----------| | | un-deux | ??? | | +-----------------------+-----------+-----------+""")
def test_build_tbl__empty_cell(): builder = FormexBuilder() table1 = Table() table1.rows[1].insert_cell(u"") table1_elem = builder.build_tbl(table1) cell_elem = table1_elem.xpath('//CELL')[0] assert len(cell_elem) == 1 assert cell_elem[0].tag == 'IE'
def test_colspan(): # fmt: off table = Table() table.cols[1].insert_cell("one") table.cols[1].insert_cell("spanned", width=2) table.cols[2].insert_cell("two") text = str(table) assert text == textwrap.dedent("""\ +-----------+-----------+ | one | two | +-----------------------+ | spanned | +-----------------------+""")
def test_rowspan(): # fmt: off table = Table() table.rows[1].insert_cell("one") table.rows[1].insert_cell("spanned", height=2) table.rows[2].insert_cell("two") text = str(table) assert text == textwrap.dedent("""\ +-----------+-----------+ | one | spanned | +-----------| | | two | | +-----------+-----------+""")
def test_build_cell__body(kwargs, attrib): builder = FormexBuilder() p_elem = P(u"text") cell_x1_y1 = Cell([p_elem], x=1, y=1, **kwargs) table = Table([cell_x1_y1]) builder._table = table # -- build the cell row_elem = ROW() row_y1 = next(iter(table.rows)) builder.build_cell(row_elem, cell_x1_y1, row_y1) # -- check the '<CELL>' attributes entry_elem = row_elem[0] # type: etree._Element assert entry_elem.tag == u"CELL" assert entry_elem.attrib == attrib assert entry_elem[0] == p_elem
def test_parse_tc(w_tc_content, expected): builder = BaseBuilder() parser = OoxmlParser(builder) # -- setup a minimal table state = parser._state state.table = Table() state.row = state.table.rows[1] state.next_col() # -- parse a <w:tc/> w_tc = etree.XML(w_tc_content) parser.parse_tc(w_tc) # -- check the styles table = state.table cell = table[(1, 1)] assert expected == cell.styles
def test_build_tgroup__tgroup_sorting(tgroup_sorting, expected_tags): # -- create a table with different natures table = Table() row1 = table.rows[1] row2 = table.rows[2] row3 = table.rows[3] row1.nature = "header" row2.nature = "body" row3.nature = "footer" row1.insert_cell("a") row2.insert_cell("b") row3.insert_cell("c") # -- create a builder table_elem = etree.Element("table") builder = CalsBuilder(tgroup_sorting=tgroup_sorting) builder.setup_table(table) builder.build_tgroup(table_elem, table) # -- check the tgroup children name and order actual_tags = [elem.tag for elem in table_elem.xpath("tgroup/*")] assert actual_tags == expected_tags
def test_parse_tc(w_tc_content, expected): builder = BaseBuilder() parser = OoxmlParser(builder) # -- setup a minimal table state = parser._state state.table = Table() state.row = state.table.rows[1] state.next_col() # -- parse a <w:tc/> w_tc = etree.XML(w_tc_content) parser.parse_tc(w_tc) # -- check the styles table = state.table cell = table[(1, 1)] # Ignore cell styles extensions (like 'x-cell-empty'). actual = { k: v for k, v in cell.styles.items() if not k.startswith("x-cell-") } assert expected == actual
def parse_tbl(self, w_tbl): """ Parse a ``<w:tbl>`` element. See: `Table Properties <http://officeopenxml.com/WPtableProperties.php>`_. :type w_tbl: etree._Element :param w_tbl: Table element. .. versionchanged:: 0.4.0 The section width and height are now stored in the 'x-sect-size' table style (units in 'pt'). """ style_id = value_of(w_tbl, "w:tblPr/w:tblStyle/@w:val") # - Table and borders are extracted from the style (if possible) # and then from the ``w:tblPr/w:tblBorders`` properties. style_borders = _get_style_borders(self._w_styles, style_id) w_tbl_borders = value_of(w_tbl, 'w:tblPr/w:tblBorders') # - Table borders (frame) and Cell borders (colsep/rowsep) use the "x-cell-" prefix table_borders = _get_table_borders(w_tbl_borders) real_table_borders = style_borders.copy() real_table_borders.update({ key: value for key, value in table_borders.items() if value is not None }) attrs = real_table_borders.copy() # -- Table shading shd = Shd(value_of(w_tbl, 'w:tblPr/w:shd')) attrs.update(shd.styles) # -- Sections: http://officeopenxml.com/WPsection.php # A section's properties are stored in a sectPr element. # For all sections except the last section, the sectPr element is stored as # a child element of the last paragraph in the section. For the last section, # the sectPr is stored as a child element of the body element. w_sect_pr = value_of( w_tbl, 'following::w:p/w:pPr/w:sectPr | following::w:sectPr') pg_sz = PgSz(value_of(w_sect_pr, 'w:pgSz')) attrs.update(pg_sz.styles) # - w:cols -- Specifies the set of columns for the section. # - ``x-sect-cols``: Section column number # Default value is "1" -- useful for @pgwide sect_cols = value_of(w_sect_pr, 'w:cols/@w:num') if sect_cols is None: if w_sect_pr is None: sect_cols = "1" # type: str else: sect_cols = w_sect_pr.xpath('count(w:cols/w:col)', namespaces=NS) # type: float sect_cols = str( int(sect_cols)) if sect_cols else "1" # type: str attrs['x-sect-cols'] = sect_cols # - The HTML ``class`` attribute is not a regular style. # We use the table ``nature``instead. self._state.table = Table(styles=attrs, nature=style_id)
def test_build_tbl__use_cals(): # see: formex-4/samples/jo-compl-2002C_061/C_2002061EN.01000403.xml table = Table( styles={ "border-top": "solid", "border-bottom": "solid", "x-sect-orient": "landscape", "x-sect-cols": "1", "background-color": "blue", "width": "180", }) table.rows[1].nature = "header" table.rows[1].insert_cell([P(u"Expert group")], styles={"align": "center"}) table.rows[1].insert_cell([P(u"First name and surname of the expert")], styles={"align": "center"}) table.rows[2].insert_cell([P(u"Control of infectious diseases")]) table.rows[2].insert_cell([P(u"Michael Angelo BORG")]) builder = FormexBuilder(use_cals=True, cals_ns=None) table_elem = builder.build_tbl(table) xml_parser = etree.XMLParser(remove_blank_text=True) # fmt: off expected = etree.XML(u"""\ <TBL NO.SEQ="0001" COLS="2" PAGE.SIZE="SINGLE.LANDSCAPE"> <CORPUS frame="topbot" colsep="0" rowsep="0" orient="land" pgwide="1" bgcolor="blue" width="180.00mm"> <colspec colname="c1" colnum="1"/> <colspec colname="c2" colnum="2"/> <ROW TYPE="HEADER"> <CELL COL="1" align="center"> <P>Expert group</P> </CELL> <CELL COL="2" align="center"> <P>First name and surname of the expert</P> </CELL> </ROW> <ROW> <CELL COL="1"> <P>Control of infectious diseases</P> </CELL> <CELL COL="2"> <P>Michael Angelo BORG</P> </CELL> </ROW> </CORPUS> </TBL>""", parser=xml_parser) # fmt: on for elem in table_elem.xpath("//*"): elem.text = elem.text or None for elem in expected.xpath("//*"): elem.text = elem.text or None diff_list = xmldiff.main.diff_trees(table_elem, expected) if diff_list: print(etree.tounicode(table_elem, pretty_print=True, with_tail=False), file=sys.stderr) assert diff_list == []
def setup_table(self, styles=None, nature=None): table = Table(styles=styles, nature=nature) self._state.table = table return self._state
def test_build_tbl__orient(orient, size, expected): builder = FormexBuilder() table = Table(styles={'x-sect-orient': orient, 'x-sect-size': size}) table.rows[1].insert_cell(u"text") table_elem = builder.build_tbl(table) assert table_elem.attrib == expected
def test_build_tbl__with_title(): # see: formex-4/samples/jo-compl-2002C_280/C_2002280EN.01000101.xml table = Table() table.rows[1].insert_cell([P(u"1 euro =")], width=3, styles={"align": "center"}) table.rows[2].nature = "header" table.rows[2].insert_cell([P()], styles={"x-cell-empty": "true"}) table.rows[2].insert_cell([P(u"Currency")]) table.rows[2].insert_cell([P(u"Exchange rate")]) table.rows[3].insert_cell([P(u"USD")]) table.rows[3].insert_cell([P(u"US dollar")]) table.rows[3].insert_cell([P(u"1,0029")]) table.rows[4].insert_cell([P(u"JPY")]) table.rows[4].insert_cell([P(u"Japanese yen")]) table.rows[4].insert_cell([P(u"121,05")]) builder = FormexBuilder(detect_titles=True) table_elem = builder.build_tbl(table) xml_parser = etree.XMLParser(remove_blank_text=True) # fmt: off expected = etree.XML(u"""\ <TBL COLS="3" NO.SEQ="0001"> <TITLE> <TI> <P>1 euro =</P> </TI> <STI/> </TITLE> <CORPUS> <ROW TYPE="HEADER"> <CELL COL="1"> <IE/> </CELL> <CELL COL="2"> <P>Currency</P> </CELL> <CELL COL="3"> <P>Exchange rate</P> </CELL> </ROW> <ROW> <CELL COL="1"> <P>USD</P> </CELL> <CELL COL="2"> <P>US dollar</P> </CELL> <CELL COL="3"> <P>1,0029</P> </CELL> </ROW> <ROW> <CELL COL="1"> <P>JPY</P> </CELL> <CELL COL="2"> <P>Japanese yen</P> </CELL> <CELL COL="3"> <P>121,05</P> </CELL> </ROW> </CORPUS> </TBL>""", parser=xml_parser) # fmt: on for elem in table_elem.xpath("//*"): elem.text = elem.text or None for elem in expected.xpath("//*"): elem.text = elem.text or None diff_list = xmldiff.main.diff_trees(table_elem, expected) if diff_list: print(etree.tounicode(table_elem, pretty_print=True, with_tail=False), file=sys.stderr) assert diff_list == []
def test_build_tbl(): # see: formex-4/samples/jo-compl-2002C_061/C_2002061EN.01000403.xml table = Table() table.rows[1].nature = "header" table.rows[1].insert_cell([P(u"Expert group")]) table.rows[1].insert_cell([P(u"First name and surname of the expert")]) table.rows[2].insert_cell([P(u"Control of infectious diseases")]) table.rows[2].insert_cell([P(u"Michael Angelo BORG")]) table.rows[3].insert_cell([P(u"Information society")], height=3) table.rows[3].insert_cell([P(u"Tony HEY")]) table.rows[4].insert_cell([P(u"José L. ENCARNAÇÃO")]) table.rows[5].insert_cell([P(u"Berit SVENDSEN")]) table.rows[6].insert_cell([P(u"Controlled thermonuclear fusion")]) table.rows[6].insert_cell([P(u"Pekka PIRILÄ")]) builder = FormexBuilder() table_elem = builder.build_tbl(table) xml_parser = etree.XMLParser(remove_blank_text=True) # fmt: off expected = etree.XML(u"""\ <TBL COLS="2" NO.SEQ="0001"> <CORPUS> <ROW TYPE="HEADER"> <CELL COL="1"> <P>Expert group</P> </CELL> <CELL COL="2"> <P>First name and surname of the expert</P> </CELL> </ROW> <ROW> <CELL COL="1"> <P>Control of infectious diseases</P> </CELL> <CELL COL="2"> <P>Michael Angelo BORG</P> </CELL> </ROW> <ROW> <CELL COL="1" ROWSPAN="3"> <P>Information society</P> </CELL> <CELL COL="2"> <P>Tony HEY</P> </CELL> </ROW> <ROW> <CELL COL="2"> <P>José L. ENCARNAÇÃO</P> </CELL> </ROW> <ROW> <CELL COL="2"> <P>Berit SVENDSEN</P> </CELL> </ROW> <ROW> <CELL COL="1"> <P>Controlled thermonuclear fusion</P> </CELL> <CELL COL="2"> <P>Pekka PIRILÄ</P> </CELL> </ROW> </CORPUS> </TBL>""", parser=xml_parser) # fmt: on for elem in table_elem.xpath("//*"): elem.text = elem.text or None for elem in expected.xpath("//*"): elem.text = elem.text or None diff_list = xmldiff.main.diff_trees(table_elem, expected) if diff_list: print(etree.tounicode(table_elem, pretty_print=True, with_tail=False), file=sys.stderr) assert diff_list == []
def test_build_table(): # see: formex-4/samples/jo-compl-2002C_061/C_2002061EN.01000403.xml table = Table( styles={ "border-top": "solid", "border-bottom": "solid", "x-sect-orient": "landscape", "x-sect-cols": "1", "background-color": "yellow", "width": "247", }) table.rows[1].nature = "header" table.rows[1].insert_cell(u"Expert group") table.rows[1].insert_cell(u"First name and surname of the expert") table.rows[2].insert_cell(u"Control of infectious diseases") table.rows[2].insert_cell(u"Michael Angelo BORG") table.rows[3].insert_cell(u"Information society", height=3) table.rows[3].insert_cell(u"Tony HEY") table.rows[4].insert_cell(u"José L. ENCARNAÇÃO") table.rows[5].insert_cell(u"Berit SVENDSEN") table.rows[6].insert_cell(u"Controlled thermonuclear fusion") table.rows[6].insert_cell(u"Pekka PIRILÄ") builder = CalsBuilder() table_elem = builder.build_table(table) xml_parser = etree.XMLParser(remove_blank_text=True) # fmt: off expected = etree.XML(u"""\ <table frame="topbot" colsep="0" rowsep="0" orient="land" pgwide="1" bgcolor="yellow" width="247.00mm"> <tgroup cols="2"> <colspec colnum="1" colname="c1"/> <colspec colnum="2" colname="c2"/> <thead> <row> <entry>Expert group</entry> <entry>First name and surname of the expert</entry> </row> </thead> <tbody> <row> <entry>Control of infectious diseases</entry> <entry>Michael Angelo BORG</entry> </row> <row> <entry morerows="2">Information society</entry> <entry>Tony HEY</entry> </row> <row> <entry>José L. ENCARNAÇÃO</entry> </row> <row> <entry>Berit SVENDSEN</entry> </row> <row> <entry>Controlled thermonuclear fusion</entry> <entry>Pekka PIRILÄ</entry> </row> </tbody> </tgroup> </table>""", parser=xml_parser) # fmt: on for elem in table_elem.xpath("//*"): elem.text = elem.text or None for elem in expected.xpath("//*"): elem.text = elem.text or None diff_list = xmldiff.main.diff_trees(table_elem, expected) if diff_list: print(etree.tounicode(table_elem, pretty_print=True, with_tail=False), file=sys.stderr) assert diff_list == []
def test_setup_table(): builder = CalsBuilder() table = Table() result = builder.setup_table(table) assert result == table