def test_no_caption(self): table = Table('{| class="wikitable"\n|a\n|+ ignore\n|}') self.assertEqual(table.caption, None) self.assertEqual(table.caption_attrs, None) table.caption = 'foo' self.assertEqual(table.string, '{| class="wikitable"\n|+foo\n|a\n|+ ignore\n|}')
def test_longer_text_and_only_rstrip(self): table = Table('{|\n|multi\nline\ntext. \n\n2nd paragraph. \n|' '\n* ulli1\n* ulli2\n* ulli3\n|}') self.assertEqual(table.data(), [[ 'multi\nline\ntext. \n\n2nd paragraph.', '\n* ulli1\n* ulli2\n* ulli3' ]])
def _extract_table(table: wtp.Table) -> Optional[dict]: row_header = [] row_data = [] try: rows = table.data(strip=True, span=True) cells = table.cells(span=True) rows_with_spans = table.data(strip=True, span=False) except Exception as e: if type(e) in [KeyboardInterrupt, ParsingTimeoutException]: raise e return None for row_idx, row in enumerate(rows): if len(row) < 2 or len(row) > 100: # ignore tables with only one or more than 100 columns (likely irrelevant or markup error) return None parsed_cells = [] for cell in row: plaintext, entities = _convert_markup(str(cell)) parsed_cells.append({'text': plaintext, 'entities': entities}) if _is_header_row(cells, row_idx): row_header = parsed_cells else: if len(rows_with_spans) > row_idx and len(row) == len( rows_with_spans[row_idx]): # only use rows that are not influenced by row-/colspan row_data.append(parsed_cells) if len(row_data) < 2: return None # ignore tables with less than 2 data rows return {'header': row_header, 'data': row_data}
def test_with_caption(self): table = Table('{|\n|+Food complements\n|-\n|Orange\n|Apple\n|-\n' '|Bread\n|Pie\n|-\n|Butter\n|Ice cream \n|}') self.assertEqual( table.data(), [['Orange', 'Apple'], ['Bread', 'Pie'], ['Butter', 'Ice cream']], )
def test_inline_colspan_and_rowspan(self): table = Table('{| class=wikitable\n' ' !a !! b !! c !! rowspan = 2 | d \n' ' |- \n' ' | e || colspan = "2"| f\n' '|}') self.assertEqual(table.data(span=True), [['a', 'b', 'c', 'd'], ['e', 'f', 'f', 'd']])
def test_no_attrs_but_caption(): text = ('{|\n|+Food complements\n|-\n|Orange\n|Apple\n|-' '\n|Bread\n|Pie\n|-\n|Butter\n|Ice cream \n|}') table = Table(text) assert table.caption == 'Food complements' assert table.caption_attrs is None table.caption = ' C ' assert table.string == text.replace('Food complements', ' C ')
def test_no_attrs_but_caption(self): text = ('{|\n|+Food complements\n|-\n|Orange\n|Apple\n|-' '\n|Bread\n|Pie\n|-\n|Butter\n|Ice cream \n|}') table = Table(text) self.assertEqual(table.caption, 'Food complements') self.assertEqual(table.caption_attrs, None) table.caption = ' C ' self.assertEqual(table.string, text.replace('Food complements', ' C '))
def test_attr_delimiter_cant_be_adjacent_to_cell_delimiter(): """Couldn't find a logical explanation for MW's behaviour.""" assert Table('{|class=wikitable\n' '!a| !!b|c\n' '|}').data() == [['', 'c']] # Remove one space and... assert Table('{|class=wikitable\n' '!a|!!b|c\n' '|}').data() == [['a', 'b|c']]
def test_only_pipes_can_seprate_attributes(): """According to the note at mw:Help:Tables#Table_headers.""" assert Table('{|class=wikitable\n! style="text-align:left;"! ' 'Item\n! Amount\n! Cost\n|}').data() == [[ 'style="text-align:left;"! Item', 'Amount', 'Cost' ]] assert Table( '{|class=wikitable\n! style="text-align:left;"| ' 'Item\n! Amount\n! Cost\n|}').data() == [['Item', 'Amount', 'Cost']]
def test_colspan_and_rowspan_and_span_true(self): table = Table('{| class="wikitable"\n!colspan= 6 |11\n|-\n' '|rowspan="2"|21\n|22\n|23\n|24\n |colspan="2"|25\n|-\n' '|31\n|colspan="2"|32\n|33\n|34\n|}') self.assertEqual(table.data(span=True), [ ['11', '11', '11', '11', '11', '11'], ['21', '22', '23', '24', '25', '25'], ['21', '31', '32', '32', '33', '34'], ])
def test_extra_spaces_have_no_effect(self): table = Table('{|\n| Orange || Apple || more\n|-\n' '| Bread || Pie || more\n|-\n' '| Butter || Ice cream || and more\n|}') self.assertEqual( table.data(), [['Orange', 'Apple', 'more'], ['Bread', 'Pie', 'more'], ['Butter', 'Ice cream', 'and more']], )
def test_colspan_0(self): table = Table('{|class=wikitable\n' '| colspan=0 | a || b\n' '|-\n' '| c || d\n' '|}') self.assertEqual(table.data(span=True), [ ['a', 'b'], ['c', 'd'], ])
def test_growing_downward_growing_cells(self): table = Table('{|class=wikitable\n' '| a || rowspan=0 | b\n' '|-\n' '| c\n' '|}') self.assertEqual(table.data(span=True), [ ['a', 'b'], ['c', 'b'], ])
def test_changing_cell_should_effect_the_table(self): t = Table('{|class=wikitable\n|a=b|c\n|}') c = t.cells(0, 0) c.value = 'v' self.assertEqual(c.value, 'v') c.set('a', 'b2') self.assertEqual(t.string, '{|class=wikitable\n|a="b2"|v\n|}') c.delete('a') self.assertEqual(t.string, '{|class=wikitable\n||v\n|}') c.set('c', 'd') self.assertEqual(t.string, '{|class=wikitable\n| c="d"|v\n|}')
def test_ending_row_group_and_rowspan_0(self): table = Table('{|class=wikitable\n' '| rowspan = 3 | a || rowspan = 0 | b || c\n' '|-\n' '| d\n' '|}') self.assertEqual(table.getdata(span=True), [ ['a', 'b', 'c'], ['a', 'b', 'd'], ['a', 'b', None], ])
def test_ending_row_group(self): table = Table('{|class=wikitable\n' '| rowspan = 3 | a || b\n' '|-\n' '| c\n' '|}') self.assertEqual(table.data(span=True), [ ['a', 'b'], ['a', 'c'], ['a', None], ])
def test_changing_cell_should_effect_the_table(): t = Table('{|class=wikitable\n|a=b|c\n|}') c = t.cells(0, 0) c.value = 'v' assert c.value == 'v' c.set_attr('a', 'b2') assert t.string == '{|class=wikitable\n|a="b2"|v\n|}' c.del_attr('a') assert t.string == '{|class=wikitable\n||v\n|}' c.set_attr('c', 'd') assert t.string == '{|class=wikitable\n| c="d"|v\n|}'
def test_with_optional_rowseprator_on_first_row(self): table = Table('{| class=wikitable | g\n' ' |- 132131 |||\n' ' | a | b\n' ' |-\n' ' | c\n' '|}') self.assertEqual( table.data(), [['b'], ['c']], )
def test_all_rows_are_on_a_single_line(self): table = Table('{|\n' '|a||b||c\n' '|-\n' '|d||e||f\n' '|-\n' '|g||h||i\n' '|}') self.assertEqual( table.data(), [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']], )
def test_with_headers(self): table = Table( '{|\n! style="text-align:left;"| Item\n! Amount\n! Cost\n|-\n' '|Orange\n|10\n|7.00\n|-\n|Bread\n|4\n|3.00\n|-\n' '|Butter\n|1\n|5.00\n|-\n!Total\n|\n|15.00\n|}') self.assertEqual(table.data(), [ ['Item', 'Amount', 'Cost'], ['Orange', '10', '7.00'], ['Bread', '4', '3.00'], ['Butter', '1', '5.00'], ['Total', '', '15.00'], ])
def test_caption_end(): # MW renders the following test input as """ # <table> # <caption>caption</caption> # <caption>second caption!</caption> # <tbody><tr><td></td></tr></tbody> # </table> # """ but only one caption is valid in HTML. Most browsers ignore the # second caption tag. wikitextparser only returns the first one. assert Table('{|\n|+ caption|| second caption!\n|}').caption == " caption" assert Table('{|\n|+style="color:red;"|caption\n|}').caption == "caption" assert Table('{|\n|+caption ! caption\n|}').caption == "caption ! caption" assert Table('{|\n|+caption !! caption\n! header\n|}').caption \ == "caption !! caption"
def test_each_row_on_a_newline(self): table = Table('{|\n' '|Orange\n' '|Apple\n' '|-\n' '|Bread\n' '|Pie\n' '|-\n' '|Butter\n' '|Ice cream \n' '|}') self.assertEqual( table.data(), [['Orange', 'Apple'], ['Bread', 'Pie'], ['Butter', 'Ice cream']], )
def test_multiline_table(self): table = Table('{|s\n|a\n|}') self.assertEqual(table.table_attrs, 's') self.assertEqual(table.attrs, {'s': ''}) self.assertEqual(table.has_attr('s'), True) self.assertEqual(table.has_attr('n'), False) self.assertEqual(table.get_attr('s'), '') table.table_attrs = 'class="wikitable"' self.assertEqual(repr(table), "Table('{|class=\"wikitable\"\\n|a\\n|}')") self.assertEqual(table.get_attr('class'), 'wikitable') table.set_attr('class', 'sortable') self.assertEqual(table.attrs, {'class': 'sortable'}) table.del_attr('class') self.assertEqual(table.attrs, {})
def test_unicode_data(self): r"""Note the \u201D character at line 2. wikitextparser/issues/9.""" self.assertEqual( Table('{|class=wikitable\n' '|align="center" rowspan="1"|A\u201D\n' '|align="center" rowspan="1"|B\n' '|}').data(), [['A”', 'B']])
def test_weird_colspan(): assert Table('{|class=wikitable\n' '! colspan="" | 1 !!colspan=" " | 2 !! 3 !! 4\n' '|-\n' '| colspan=" 2a2"| a\n' '|colspan="1.5"| b\n' '|}').data() == [['1', '2', '3', '4'], ['a', 'a', 'b', None]]
def test_inline_colspan_and_rowspan(): assert Table('{| class=wikitable\n' ' !a !! b !! c !! rowspan = 2 | d \n' ' |- \n' ' | e || colspan = "2"| f\n' '|}').data(span=True) == [['a', 'b', 'c', 'd'], ['e', 'f', 'f', 'd']]
def test_colspan_and_rowspan_and_span_true(): assert Table('{| class="wikitable"\n!colspan= 6 |11\n|-\n' '|rowspan="2"|21\n|22\n|23\n|24\n |colspan="2"|25\n|-\n' '|31\n|colspan="2"|32\n|33\n|34\n|}').data(span=True) == [[ '11', '11', '11', '11', '11', '11' ], ['21', '22', '23', '24', '25', '25'], ['21', '31', '32', '32', '33', '34']]
def test_extra_spaces_have_no_effect(): assert Table('{|\n| Orange || Apple || more\n|-\n' '| Bread || Pie || more\n|-\n' '| Butter || Ice cream || and more\n|}').data() == [[ 'Orange', 'Apple', 'more' ], ['Bread', 'Pie', 'more'], ['Butter', 'Ice cream', 'and more']]
def test_with_optional_rowseprator_on_first_row(): assert Table('{| class=wikitable | g\n' ' |- 132131 |||\n' ' | a | b\n' ' |-\n' ' | c\n' '|}').data() == [['b'], ['c']]
def test_ending_row_group_and_rowspan_0(): assert Table('{|class=wikitable\n' '| rowspan = 3 | a || rowspan = 0 | b || c\n' '|-\n' '| d\n' '|}').data(span=True) == [['a', 'b', 'c'], ['a', 'b', 'd'], ['a', 'b', None]]