示例#1
0
 def test_no_caption(self):
     table = Table('{| class="wikitable"\n|a\n|+ ignore\n|}')
     self.assertEqual(table.caption, None)
     self.assertEqual(table.caption_attrs, None)
     table.caption = 'foo'
     self.assertEqual(table.string,
                      '{| class="wikitable"\n|+foo\n|a\n|+ ignore\n|}')
示例#2
0
 def test_longer_text_and_only_rstrip(self):
     table = Table('{|\n|multi\nline\ntext. \n\n2nd paragraph. \n|'
                   '\n* ulli1\n* ulli2\n* ulli3\n|}')
     self.assertEqual(table.data(), [[
         'multi\nline\ntext. \n\n2nd paragraph.',
         '\n* ulli1\n* ulli2\n* ulli3'
     ]])
示例#3
0
def _extract_table(table: wtp.Table) -> Optional[dict]:
    row_header = []
    row_data = []
    try:
        rows = table.data(strip=True, span=True)
        cells = table.cells(span=True)
        rows_with_spans = table.data(strip=True, span=False)
    except Exception as e:
        if type(e) in [KeyboardInterrupt, ParsingTimeoutException]:
            raise e
        return None
    for row_idx, row in enumerate(rows):
        if len(row) < 2 or len(row) > 100:
            # ignore tables with only one or more than 100 columns (likely irrelevant or markup error)
            return None
        parsed_cells = []
        for cell in row:
            plaintext, entities = _convert_markup(str(cell))
            parsed_cells.append({'text': plaintext, 'entities': entities})
        if _is_header_row(cells, row_idx):
            row_header = parsed_cells
        else:
            if len(rows_with_spans) > row_idx and len(row) == len(
                    rows_with_spans[row_idx]):
                # only use rows that are not influenced by row-/colspan
                row_data.append(parsed_cells)
    if len(row_data) < 2:
        return None  # ignore tables with less than 2 data rows
    return {'header': row_header, 'data': row_data}
示例#4
0
 def test_with_caption(self):
     table = Table('{|\n|+Food complements\n|-\n|Orange\n|Apple\n|-\n'
                   '|Bread\n|Pie\n|-\n|Butter\n|Ice cream \n|}')
     self.assertEqual(
         table.data(),
         [['Orange', 'Apple'], ['Bread', 'Pie'], ['Butter', 'Ice cream']],
     )
示例#5
0
 def test_inline_colspan_and_rowspan(self):
     table = Table('{| class=wikitable\n'
                   ' !a !! b !!  c !! rowspan = 2 | d \n'
                   ' |- \n'
                   ' | e || colspan = "2"| f\n'
                   '|}')
     self.assertEqual(table.data(span=True),
                      [['a', 'b', 'c', 'd'], ['e', 'f', 'f', 'd']])
示例#6
0
def test_no_attrs_but_caption():
    text = ('{|\n|+Food complements\n|-\n|Orange\n|Apple\n|-'
            '\n|Bread\n|Pie\n|-\n|Butter\n|Ice cream \n|}')
    table = Table(text)
    assert table.caption == 'Food complements'
    assert table.caption_attrs is None
    table.caption = ' C '
    assert table.string == text.replace('Food complements', ' C ')
示例#7
0
 def test_no_attrs_but_caption(self):
     text = ('{|\n|+Food complements\n|-\n|Orange\n|Apple\n|-'
             '\n|Bread\n|Pie\n|-\n|Butter\n|Ice cream \n|}')
     table = Table(text)
     self.assertEqual(table.caption, 'Food complements')
     self.assertEqual(table.caption_attrs, None)
     table.caption = ' C '
     self.assertEqual(table.string, text.replace('Food complements', ' C '))
示例#8
0
def test_attr_delimiter_cant_be_adjacent_to_cell_delimiter():
    """Couldn't find a logical explanation for MW's behaviour."""
    assert Table('{|class=wikitable\n'
                 '!a| !!b|c\n'
                 '|}').data() == [['', 'c']]
    # Remove one space and...
    assert Table('{|class=wikitable\n'
                 '!a|!!b|c\n'
                 '|}').data() == [['a', 'b|c']]
示例#9
0
def test_only_pipes_can_seprate_attributes():
    """According to the note at mw:Help:Tables#Table_headers."""
    assert Table('{|class=wikitable\n! style="text-align:left;"! '
                 'Item\n! Amount\n! Cost\n|}').data() == [[
                     'style="text-align:left;"! Item', 'Amount', 'Cost'
                 ]]
    assert Table(
        '{|class=wikitable\n! style="text-align:left;"| '
        'Item\n! Amount\n! Cost\n|}').data() == [['Item', 'Amount', 'Cost']]
示例#10
0
 def test_colspan_and_rowspan_and_span_true(self):
     table = Table('{| class="wikitable"\n!colspan= 6 |11\n|-\n'
                   '|rowspan="2"|21\n|22\n|23\n|24\n  |colspan="2"|25\n|-\n'
                   '|31\n|colspan="2"|32\n|33\n|34\n|}')
     self.assertEqual(table.data(span=True), [
         ['11', '11', '11', '11', '11', '11'],
         ['21', '22', '23', '24', '25', '25'],
         ['21', '31', '32', '32', '33', '34'],
     ])
示例#11
0
 def test_extra_spaces_have_no_effect(self):
     table = Table('{|\n|  Orange    ||   Apple   ||   more\n|-\n'
                   '|   Bread    ||   Pie     ||   more\n|-\n'
                   '|   Butter   || Ice cream ||  and more\n|}')
     self.assertEqual(
         table.data(),
         [['Orange', 'Apple', 'more'], ['Bread', 'Pie', 'more'],
          ['Butter', 'Ice cream', 'and more']],
     )
示例#12
0
 def test_colspan_0(self):
     table = Table('{|class=wikitable\n'
                   '| colspan=0 | a || b\n'
                   '|-\n'
                   '| c || d\n'
                   '|}')
     self.assertEqual(table.data(span=True), [
         ['a', 'b'],
         ['c', 'd'],
     ])
示例#13
0
 def test_growing_downward_growing_cells(self):
     table = Table('{|class=wikitable\n'
                   '| a || rowspan=0 | b\n'
                   '|-\n'
                   '| c\n'
                   '|}')
     self.assertEqual(table.data(span=True), [
         ['a', 'b'],
         ['c', 'b'],
     ])
示例#14
0
 def test_changing_cell_should_effect_the_table(self):
     t = Table('{|class=wikitable\n|a=b|c\n|}')
     c = t.cells(0, 0)
     c.value = 'v'
     self.assertEqual(c.value, 'v')
     c.set('a', 'b2')
     self.assertEqual(t.string, '{|class=wikitable\n|a="b2"|v\n|}')
     c.delete('a')
     self.assertEqual(t.string, '{|class=wikitable\n||v\n|}')
     c.set('c', 'd')
     self.assertEqual(t.string, '{|class=wikitable\n| c="d"|v\n|}')
示例#15
0
 def test_ending_row_group_and_rowspan_0(self):
     table = Table('{|class=wikitable\n'
                   '| rowspan = 3 | a || rowspan = 0 | b || c\n'
                   '|-\n'
                   '| d\n'
                   '|}')
     self.assertEqual(table.getdata(span=True), [
         ['a', 'b', 'c'],
         ['a', 'b', 'd'],
         ['a', 'b', None],
     ])
示例#16
0
 def test_ending_row_group(self):
     table = Table('{|class=wikitable\n'
                   '| rowspan = 3 | a || b\n'
                   '|-\n'
                   '| c\n'
                   '|}')
     self.assertEqual(table.data(span=True), [
         ['a', 'b'],
         ['a', 'c'],
         ['a', None],
     ])
示例#17
0
def test_changing_cell_should_effect_the_table():
    t = Table('{|class=wikitable\n|a=b|c\n|}')
    c = t.cells(0, 0)
    c.value = 'v'
    assert c.value == 'v'
    c.set_attr('a', 'b2')
    assert t.string == '{|class=wikitable\n|a="b2"|v\n|}'
    c.del_attr('a')
    assert t.string == '{|class=wikitable\n||v\n|}'
    c.set_attr('c', 'd')
    assert t.string == '{|class=wikitable\n| c="d"|v\n|}'
示例#18
0
 def test_with_optional_rowseprator_on_first_row(self):
     table = Table('{| class=wikitable | g\n'
                   ' |- 132131 |||\n'
                   '  | a | b\n'
                   ' |-\n'
                   '  | c\n'
                   '|}')
     self.assertEqual(
         table.data(),
         [['b'], ['c']],
     )
示例#19
0
 def test_all_rows_are_on_a_single_line(self):
     table = Table('{|\n'
                   '|a||b||c\n'
                   '|-\n'
                   '|d||e||f\n'
                   '|-\n'
                   '|g||h||i\n'
                   '|}')
     self.assertEqual(
         table.data(),
         [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']],
     )
示例#20
0
 def test_with_headers(self):
     table = Table(
         '{|\n! style="text-align:left;"| Item\n! Amount\n! Cost\n|-\n'
         '|Orange\n|10\n|7.00\n|-\n|Bread\n|4\n|3.00\n|-\n'
         '|Butter\n|1\n|5.00\n|-\n!Total\n|\n|15.00\n|}')
     self.assertEqual(table.data(), [
         ['Item', 'Amount', 'Cost'],
         ['Orange', '10', '7.00'],
         ['Bread', '4', '3.00'],
         ['Butter', '1', '5.00'],
         ['Total', '', '15.00'],
     ])
示例#21
0
def test_caption_end():
    # MW renders the following test input as """
    # <table>
    #  <caption>caption</caption>
    #  <caption>second caption!</caption>
    #  <tbody><tr><td></td></tr></tbody>
    # </table>
    # """ but only one caption is valid in HTML. Most browsers ignore the
    # second caption tag. wikitextparser only returns the first one.
    assert Table('{|\n|+ caption|| second caption!\n|}').caption == " caption"
    assert Table('{|\n|+style="color:red;"|caption\n|}').caption == "caption"
    assert Table('{|\n|+caption ! caption\n|}').caption == "caption ! caption"
    assert Table('{|\n|+caption !! caption\n! header\n|}').caption \
        == "caption !! caption"
示例#22
0
 def test_each_row_on_a_newline(self):
     table = Table('{|\n'
                   '|Orange\n'
                   '|Apple\n'
                   '|-\n'
                   '|Bread\n'
                   '|Pie\n'
                   '|-\n'
                   '|Butter\n'
                   '|Ice cream \n'
                   '|}')
     self.assertEqual(
         table.data(),
         [['Orange', 'Apple'], ['Bread', 'Pie'], ['Butter', 'Ice cream']],
     )
示例#23
0
 def test_multiline_table(self):
     table = Table('{|s\n|a\n|}')
     self.assertEqual(table.table_attrs, 's')
     self.assertEqual(table.attrs, {'s': ''})
     self.assertEqual(table.has_attr('s'), True)
     self.assertEqual(table.has_attr('n'), False)
     self.assertEqual(table.get_attr('s'), '')
     table.table_attrs = 'class="wikitable"'
     self.assertEqual(repr(table),
                      "Table('{|class=\"wikitable\"\\n|a\\n|}')")
     self.assertEqual(table.get_attr('class'), 'wikitable')
     table.set_attr('class', 'sortable')
     self.assertEqual(table.attrs, {'class': 'sortable'})
     table.del_attr('class')
     self.assertEqual(table.attrs, {})
示例#24
0
 def test_unicode_data(self):
     r"""Note the \u201D character at line 2. wikitextparser/issues/9."""
     self.assertEqual(
         Table('{|class=wikitable\n'
               '|align="center" rowspan="1"|A\u201D\n'
               '|align="center" rowspan="1"|B\n'
               '|}').data(), [['A”', 'B']])
示例#25
0
def test_weird_colspan():
    assert Table('{|class=wikitable\n'
                 '! colspan="" | 1 !!colspan=" " | 2 !! 3 !! 4\n'
                 '|-\n'
                 '| colspan=" 2a2"| a\n'
                 '|colspan="1.5"| b\n'
                 '|}').data() == [['1', '2', '3', '4'], ['a', 'a', 'b', None]]
示例#26
0
def test_inline_colspan_and_rowspan():
    assert Table('{| class=wikitable\n'
                 ' !a !! b !!  c !! rowspan = 2 | d \n'
                 ' |- \n'
                 ' | e || colspan = "2"| f\n'
                 '|}').data(span=True) == [['a', 'b', 'c', 'd'],
                                           ['e', 'f', 'f', 'd']]
示例#27
0
def test_colspan_and_rowspan_and_span_true():
    assert Table('{| class="wikitable"\n!colspan= 6 |11\n|-\n'
                 '|rowspan="2"|21\n|22\n|23\n|24\n  |colspan="2"|25\n|-\n'
                 '|31\n|colspan="2"|32\n|33\n|34\n|}').data(span=True) == [[
                     '11', '11', '11', '11', '11', '11'
                 ], ['21', '22', '23', '24', '25',
                     '25'], ['21', '31', '32', '32', '33', '34']]
示例#28
0
def test_extra_spaces_have_no_effect():
    assert Table('{|\n|  Orange    ||   Apple   ||   more\n|-\n'
                 '|   Bread    ||   Pie     ||   more\n|-\n'
                 '|   Butter   || Ice cream ||  and more\n|}').data() == [[
                     'Orange', 'Apple', 'more'
                 ], ['Bread', 'Pie',
                     'more'], ['Butter', 'Ice cream', 'and more']]
示例#29
0
def test_with_optional_rowseprator_on_first_row():
    assert Table('{| class=wikitable | g\n'
                 ' |- 132131 |||\n'
                 '  | a | b\n'
                 ' |-\n'
                 '  | c\n'
                 '|}').data() == [['b'], ['c']]
示例#30
0
def test_ending_row_group_and_rowspan_0():
    assert Table('{|class=wikitable\n'
                 '| rowspan = 3 | a || rowspan = 0 | b || c\n'
                 '|-\n'
                 '| d\n'
                 '|}').data(span=True) == [['a', 'b', 'c'], ['a', 'b', 'd'],
                                           ['a', 'b', None]]