Exemplo n.º 1
0
 def test_multiple_header_rows(self):
     # Issue #13434
     expected_df = DataFrame(data=[("Hillary", 68,
                                    "D"), ("Bernie", 74,
                                           "D"), ("Donald", 69, "R")])
     expected_df.columns = [["Unnamed: 0_level_0", "Age", "Party"],
                            [
                                "Name", "Unnamed: 1_level_1",
                                "Unnamed: 2_level_1"
                            ]]
     html = expected_df.to_html(index=False)
     html_df = read_html(html, )[0]
     tm.assert_frame_equal(expected_df, html_df)
Exemplo n.º 2
0
    def test_keep_default_na(self):
        html_data = """<table>
                        <thead>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>"""

        expected_df = DataFrame({'a': ['N/A', 'NA']})
        html_df = read_html(html_data, keep_default_na=False)[0]
        tm.assert_frame_equal(expected_df, html_df)

        expected_df = DataFrame({'a': [np.nan, np.nan]})
        html_df = read_html(html_data, keep_default_na=True)[0]
        tm.assert_frame_equal(expected_df, html_df)
Exemplo n.º 3
0
    def test_na_values(self):
        # GH 13461
        html_data = """<table>
                        <thead>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> 0.763</td>
                            </tr>
                            <tr>
                            <td> 0.244</td>
                            </tr>
                        </tbody>
                    </table>"""

        expected_df = DataFrame({'a': [0.763, np.nan]})
        html_df = read_html(html_data, na_values=[0.244])[0]
        tm.assert_frame_equal(expected_df, html_df)
Exemplo n.º 4
0
    def test_converters(self):
        # GH 13461
        html_data = """<table>
                        <thead>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> 0.763</td>
                            </tr>
                            <tr>
                            <td> 0.244</td>
                            </tr>
                        </tbody>
                    </table>"""

        expected_df = DataFrame({'a': ['0.763', '0.244']})
        html_df = read_html(html_data, converters={'a': str})[0]
        tm.assert_frame_equal(expected_df, html_df)
Exemplo n.º 5
0
 def read_html(self, *args, **kwargs):
     kwargs['flavor'] = self.flavor
     return read_html(*args, **kwargs)
Exemplo n.º 6
0
 def read_html(self, *args, **kwargs):
     kwargs.setdefault('flavor', self.flavor)
     return read_html(*args, **kwargs)
Exemplo n.º 7
0
 def test_bool_header_arg(self):
     # GH 6114
     for arg in [True, False]:
         with pytest.raises(TypeError):
             read_html(self.spam_data, header=arg)
Exemplo n.º 8
0
def test_same_ordering():
    _skip_if_none_of(['bs4', 'lxml', 'html5lib'])
    filename = os.path.join(DATA_PATH, 'valid_markup.html')
    dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
    dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
    assert_framelist_equal(dfs_lxml, dfs_bs4)
Exemplo n.º 9
0
def test_invalid_flavor():
    url = 'google.com'
    with pytest.raises(ValueError):
        read_html(url, 'google', flavor='not a* valid**++ flavor')