Exemplo n.º 1
0
 def test_preserve_html_None(self):
     html = dedent('''
     <html>
       <body>
         <table>
           <tr>
             <td><b>f1</b></td>
             <td>f2</td>
             <td>f3</td>
           </tr>
           <tr>
             <td><i>r0f1</i></td>
             <td><i>r0f2</i></td>
             <td><i>r0f3</i></td>
           </tr>
         </table>
       </body>
     </html>
     ''').encode('utf-8')
     table = rows.import_from_html(BytesIO(html),
                                   encoding='utf-8',
                                   preserve_html=True)
     table2 = rows.import_from_html(BytesIO(html),
                                    encoding='utf-8',
                                    preserve_html=False)
     self.assertEqual(table[0].f1, '<i>r0f1</i>')
     self.assertEqual(table[0].f2, '<i>r0f2</i>')
     self.assertEqual(table[0].f3, '<i>r0f3</i>')
Exemplo n.º 2
0
 def test_preserve_html_None(self):
     html = dedent(
         """
     <html>
       <body>
         <table>
           <tr>
             <td><b>f1</b></td>
             <td>f2</td>
             <td>f3</td>
           </tr>
           <tr>
             <td><i>r0f1</i></td>
             <td><i>r0f2</i></td>
             <td><i>r0f3</i></td>
           </tr>
         </table>
       </body>
     </html>
     """
     ).encode("utf-8")
     table = rows.import_from_html(
         BytesIO(html), encoding="utf-8", preserve_html=True
     )
     table2 = rows.import_from_html(
         BytesIO(html), encoding="utf-8", preserve_html=False
     )
     self.assertEqual(table[0].f1, "<i>r0f1</i>")
     self.assertEqual(table[0].f2, "<i>r0f2</i>")
     self.assertEqual(table[0].f3, "<i>r0f3</i>")
def extract_data():
    url = "https://www.receita.fazenda.gov.br/pessoajuridica/cnpj/tabelas/natjurqualificaresponsavel.htm"
    response = requests.get(url, verify=False)
    table_1 = rows.import_from_html(
        io.BytesIO(response.content), encoding=response.encoding, index=0, ignore_colspan=False
    )
    table_2 = rows.import_from_html(
        io.BytesIO(response.content), encoding=response.encoding, index=1, ignore_colspan=False
    )

    categoria, codigo_categoria = None, None
    for row in chain(table_1, table_2):
        row = {key: clear_text(value) for key, value in row._asdict().items()}

        codigo = row["codigo"]
        if ". " in codigo:
            categoria = codigo.title()
            split_index = categoria.find(". ")
            codigo_categoria, categoria = categoria[:split_index], categoria[split_index + 2 :]
            continue
        else:
            row["codigo"] = int(codigo.replace("-", ""))
        row["categoria"] = categoria
        row["codigo_categoria"] = codigo_categoria
        row["qualificacao"] = [item.strip() for item in row["qualificacao"].replace(" ou ", ", ").split(",")]

        yield row
Exemplo n.º 4
0
 def test_preserve_html_None(self):
     html = dedent('''
     <html>
       <body>
         <table>
           <tr>
             <td><b>f1</b></td>
             <td>f2</td>
             <td>f3</td>
           </tr>
           <tr>
             <td><i>r0f1</i></td>
             <td><i>r0f2</i></td>
             <td><i>r0f3</i></td>
           </tr>
         </table>
       </body>
     </html>
     ''').encode('utf-8')
     table = rows.import_from_html(BytesIO(html),
                                   encoding='utf-8',
                                   preserve_html=True)
     table2 = rows.import_from_html(BytesIO(html),
                                   encoding='utf-8',
                                   preserve_html=False)
     self.assertEqual(table[0].f1, '<i>r0f1</i>')
     self.assertEqual(table[0].f2, '<i>r0f2</i>')
     self.assertEqual(table[0].f3, '<i>r0f3</i>')
Exemplo n.º 5
0
 def retrieve_legislators(self, url):
     html = BaseCollector.retrieve_uri(self,
                                       url,
                                       post_process=False,
                                       force_encoding='utf-8')
     return rows.import_from_html(BytesIO(html.encode('utf-8')),
                                  preserve_html=True)
Exemplo n.º 6
0
    def test_ignore_colspan(self):
        filename = 'tests/data/colspan-table.html'
        fobj = open(filename)

        table = rows.import_from_html(fobj, ignore_colspan=True)
        self.assertEqual(set(table.fields.keys()), set(['field1', 'field2']))
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].field1, 'row1field1')
        self.assertEqual(table[0].field2, 'row1field2')
        self.assertEqual(table[1].field1, 'row2field1')
        self.assertEqual(table[1].field2, 'row2field2')

        fobj = open(filename)
        with self.assertRaises(ValueError) as raises:
            table = rows.import_from_html(fobj, ignore_colspan=False)
        self.assertEqual(raises.exception.message, 'Number of fields differ')
Exemplo n.º 7
0
    def parse_budget(self, year, action):
        table = rows.import_from_html(
            io.BytesIO(self.browser.html.encode("utf-8")),
            index=10,
            force_types={
                "dotacao_inicial": BRDecimalField,
                "dotacao_atual": BRDecimalField,
                "empenhado": BRDecimalField,
                "liquidado": BRDecimalField,
                "pago": BRDecimalField,
                "pago_restos": BRDecimalField,
            },
        )
        result = []
        for row in table:
            if row.elemento == "TOTAL":
                continue
            row = row._asdict()
            row.update({
                "ano": year,
                "codigo_acao": action,
                "estado": "SP",
            })
            result.append(row)

        return rows.import_from_dicts(result)
Exemplo n.º 8
0
    def test_ignore_colspan(self):
        filename = 'tests/data/colspan-table.html'
        fobj = open(filename, mode='rb')

        table = rows.import_from_html(fobj, ignore_colspan=True)
        self.assertEqual(set(table.fields.keys()), set(['field1', 'field2']))
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].field1, 'row1field1')
        self.assertEqual(table[0].field2, 'row1field2')
        self.assertEqual(table[1].field1, 'row2field1')
        self.assertEqual(table[1].field2, 'row2field2')

        fobj = open(filename, mode='rb')
        with self.assertRaises(ValueError) as raises:
            table = rows.import_from_html(fobj, ignore_colspan=False)
        self.assertEqual(raises.exception.args[0], 'Number of fields differ')
Exemplo n.º 9
0
def download_years():
    "Return a list with the game's years as integers"

    response = requests.get(URL_YEARS)
    html = response.content
    games = rows.import_from_html(BytesIO(html), encoding='utf-8')
    return [game.year for game in games]
Exemplo n.º 10
0
    def test_extract_properties(self):
        filename = 'tests/data/properties-table.html'
        fobj = open(filename)

        table = rows.import_from_html(fobj, properties=True)
        self.assertEqual(table.fields.keys(),
                         ['field1', 'field2', 'properties'])
        self.assertEqual(table.fields.values(), [
            rows.fields.TextField, rows.fields.TextField, rows.fields.JSONField
        ])
        properties_1 = {
            'class': 'some-class another-class',
            'data-test': 'value',
        }
        properties_2 = {
            'class': 'css-class',
            'data-test': 'value2',
        }
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].field1, 'row1field1')
        self.assertEqual(table[0].field2, 'row1field2')
        self.assertEqual(table[0].properties, properties_1)
        self.assertEqual(table[1].field1, 'row2field1')
        self.assertEqual(table[1].field2, 'row2field2')
        self.assertEqual(table[1].properties, properties_2)
Exemplo n.º 11
0
    def convert(self):

        convert_url = self.cleaned_data.get('convert_url')
        convert_file = self.cleaned_data.get('convert_file')
        type_to = self.cleaned_data.get('type_to')

        if convert_file:
            path = os.path.join(settings.MEDIA_ROOT, default_storage.save(convert_file.name, ContentFile(convert_file.read())))
            convert_type = convert_file.name.split('.')[-1]
            # Import
            data = getattr(rows, 'import_from_%s' % convert_type)(path)
            # Export
            result = StringIO.StringIO()
            getattr(rows, 'export_to_%s' % type_to)(data, result)

            os.unlink(path)
            return result
        else:
            path = BytesIO(requests.get(convert_url).content)
            convert_type = 'html'

            # Import
            data = rows.import_from_html(path, preserve_html=True)
            # Export
            result = StringIO.StringIO()
            getattr(rows, 'export_to_%s' % type_to)(data, result)

            return result
Exemplo n.º 12
0
    def test_nested_tables_outer(self):
        filename = "tests/data/nested-table.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj)
        self.assertEqual(
            set(table.fields.keys()), set(["t00r0c0", "t00r0c1", "t00r0c2"])
        )
        self.assertEqual(len(table), 3)

        self.assertEqual(table[0].t00r0c0, "t0,0r1c0")
        self.assertEqual(table[0].t00r0c1, "t0,0r1c1")
        self.assertEqual(table[0].t00r0c2, "t0,0r1c2")

        # if there are nested tables, the inner ones will be represented as
        # strings (each <td>...</td> element will return only one string, even
        # if there is a <table> inside it)
        inner_table = (
            "t0,1r0c0 t0,1r0c1 t0,1r1c0 t0,1r1c1 t0,1r2c0 "
            "t0,1r2c1 t0,2r0c0 t0,2r0c1 t0,2r1c0 t0,2r1c1 "
            "t0,1r3c1 t0,1r4c0 t0,1r4c1 t0,1r5c0 t0,1r5c1"
        )
        self.assertEqual(table[1].t00r0c0, "t0,0r2c0")
        self.assertEqual(table[1].t00r0c1, inner_table)
        self.assertEqual(table[1].t00r0c2, "t0,0r2c2")

        self.assertEqual(table[2].t00r0c0, "t0,0r3c0")
        self.assertEqual(table[2].t00r0c1, "t0,0r3c1")
        self.assertEqual(table[2].t00r0c2, "t0,0r3c2")
    def test_import_from_html_filename(self):
        table = rows.import_from_html(self.filename, encoding=self.encoding)
        self.assert_table_equal(table, utils.table)

        expected_meta = {'imported_from': 'html',
                         'filename': self.filename,
                         'encoding': self.encoding,}
        self.assertEqual(table.meta, expected_meta)
Exemplo n.º 14
0
    def test_import_from_html_filename(self):
        table = rows.import_from_html(self.filename, encoding=self.encoding)
        self.assert_table_equal(table, utils.table)

        expected_meta = {'imported_from': 'html',
                         'filename': self.filename,
                         'encoding': self.encoding,}
        self.assertEqual(table.meta, expected_meta)
Exemplo n.º 15
0
    def test_export_to_html_filename(self):
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_html(utils.table, temp.name)

        table = rows.import_from_html(temp.name)
        self.assert_table_equal(table, utils.table)
Exemplo n.º 16
0
    def test_import_from_html_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        with open(self.filename) as fobj:
            table = rows.import_from_html(fobj, encoding=self.encoding)
        self.assert_table_equal(table, utils.table)

        expected_meta = {'imported_from': 'html', 'filename': self.filename,}
        self.assertEqual(table.meta, expected_meta)
Exemplo n.º 17
0
    def test_export_to_html_filename(self):
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_html(utils.table, temp.name)

        table = rows.import_from_html(temp.name)
        self.assert_table_equal(table, utils.table)
Exemplo n.º 18
0
    def test_table_index(self):
        filename = "tests/data/simple-table.html"
        fobj = open(filename, mode="rb")

        table_1 = rows.import_from_html(fobj)
        self.assertEqual(set(table_1.fields.keys()), set(["t0r0c0", "t0r0c1"]))
        self.assertEqual(len(table_1), 1)
        self.assertEqual(table_1[0].t0r0c0, "t0r1c0")
        self.assertEqual(table_1[0].t0r0c1, "t0r1c1")

        fobj.seek(0)
        table_2 = rows.import_from_html(fobj, index=1)
        self.assertEqual(set(table_2.fields.keys()), set(["t1r0c0", "t1r0c1"]))
        self.assertEqual(len(table_2), 2)
        self.assertEqual(table_2[0].t1r0c0, "t1r1c0")
        self.assertEqual(table_2[0].t1r0c1, "t1r1c1")
        self.assertEqual(table_2[1].t1r0c0, "t1r2c0")
        self.assertEqual(table_2[1].t1r0c1, "t1r2c1")
Exemplo n.º 19
0
    def test_table_index(self):
        filename = 'tests/data/simple-table.html'
        fobj = open(filename, mode='rb')

        table_1 = rows.import_from_html(fobj)
        self.assertEqual(set(table_1.fields.keys()), set(['t0r0c0', 't0r0c1']))
        self.assertEqual(len(table_1), 1)
        self.assertEqual(table_1[0].t0r0c0, 't0r1c0')
        self.assertEqual(table_1[0].t0r0c1, 't0r1c1')

        fobj.seek(0)
        table_2 = rows.import_from_html(fobj, index=1)
        self.assertEqual(set(table_2.fields.keys()), set(['t1r0c0', 't1r0c1']))
        self.assertEqual(len(table_2), 2)
        self.assertEqual(table_2[0].t1r0c0, 't1r1c0')
        self.assertEqual(table_2[0].t1r0c1, 't1r1c1')
        self.assertEqual(table_2[1].t1r0c0, 't1r2c0')
        self.assertEqual(table_2[1].t1r0c1, 't1r2c1')
Exemplo n.º 20
0
    def test_table_index(self):
        filename = 'tests/data/simple-table.html'
        fobj = open(filename)

        table_1 = rows.import_from_html(fobj)
        self.assertEqual(set(table_1.fields.keys()), set(['t0r0c0', 't0r0c1']))
        self.assertEqual(len(table_1), 1)
        self.assertEqual(table_1[0].t0r0c0, 't0r1c0')
        self.assertEqual(table_1[0].t0r0c1, 't0r1c1')

        fobj.seek(0)
        table_2 = rows.import_from_html(fobj, index=1)
        self.assertEqual(set(table_2.fields.keys()), set(['t1r0c0', 't1r0c1']))
        self.assertEqual(len(table_2), 2)
        self.assertEqual(table_2[0].t1r0c0, 't1r1c0')
        self.assertEqual(table_2[0].t1r0c1, 't1r1c1')
        self.assertEqual(table_2[1].t1r0c0, 't1r2c0')
        self.assertEqual(table_2[1].t1r0c1, 't1r2c1')
Exemplo n.º 21
0
    def test_export_to_html_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        # TODO: may test file contents
        temp = tempfile.NamedTemporaryFile(delete=False, mode="wb")
        self.files_to_delete.append(temp.name)
        rows.export_to_html(utils.table, temp.file)

        table = rows.import_from_html(temp.name)
        self.assert_table_equal(table, utils.table)
Exemplo n.º 22
0
    def test_nested_tables_second_inner(self):
        filename = "tests/data/nested-table.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj, index=2)
        self.assertEqual(set(table.fields.keys()), set(["t02r0c0", "t02r0c1"]))
        self.assertEqual(len(table), 1)

        self.assertEqual(table[0].t02r0c0, "t0,2r1c0")
        self.assertEqual(table[0].t02r0c1, "t0,2r1c1")
Exemplo n.º 23
0
    def test_import_from_html_filename(self):
        table = rows.import_from_html(self.filename, encoding=self.encoding)
        self.assert_table_equal(table, utils.table)

        expected_meta = {
            "imported_from": "html",
            "filename": self.filename,
            "encoding": self.encoding,
        }
        self.assertEqual(table.meta, expected_meta)
Exemplo n.º 24
0
 def extract(self):
     table = rows.import_from_html(
         self.filename,
         encoding="iso-8859-1",
         row_tag="//tr[not(@bgcolor)]",
         fields=self.fields,
         skip_header=False,
     )
     for row in table:
         yield row._asdict()
    def test_import_from_html_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        with open(self.filename, mode='rb') as fobj:
            table = rows.import_from_html(fobj, encoding=self.encoding)
        self.assert_table_equal(table, utils.table)

        expected_meta = {'imported_from': 'html',
                         'filename': self.filename,
                         'encoding': self.encoding,}
        self.assertEqual(table.meta, expected_meta)
Exemplo n.º 26
0
    def test_nested_tables_second_inner(self):
        filename = 'tests/data/nested-table.html'
        fobj = open(filename, mode='rb')

        table = rows.import_from_html(fobj, index=2)
        self.assertEqual(set(table.fields.keys()), set(['t02r0c0', 't02r0c1']))
        self.assertEqual(len(table), 1)

        self.assertEqual(table[0].t02r0c0, 't0,2r1c0')
        self.assertEqual(table[0].t02r0c1, 't0,2r1c1')
Exemplo n.º 27
0
    def test_preserve_html_and_not_skip_header(self, mocked_create_table):
        filename = "tests/data/table-with-sections.html"

        # If `import_from_html` needs to identify field names, then it
        # should not preserve HTML inside first row
        table_1 = rows.import_from_html(filename, index=1, preserve_html=True)
        call_args = mocked_create_table.call_args_list.pop()
        data = list(call_args[0][0])
        kwargs = call_args[1]

        self.assertEqual(kwargs.get("fields", None), None)
        self.assertEqual(len(data), 6)
        self.assertNotIn("<", data[0][1])
        self.assertNotIn(">", data[0][1])
        for row in data[1:]:
            # Second field has HTML
            self.assertIn("<", row[1])
            self.assertIn(">", row[1])

        # If we provide fields and ask to preserve HTML and to don't skip
        # header then it should strip HTML from every row
        fields = OrderedDict(
            [
                ("first", rows.fields.TextField),
                ("second", rows.fields.TextField),
                ("third", rows.fields.TextField),
                ("fourth", rows.fields.TextField),
            ]
        )
        table_2 = rows.import_from_html(
            filename, index=1, fields=fields, preserve_html=True, skip_header=False
        )
        call_args = mocked_create_table.call_args_list.pop()
        data = list(call_args[0][0])
        kwargs = call_args[1]

        self.assertEqual(kwargs.get("fields", None), fields)
        self.assertEqual(len(data), 6)
        for row in data:
            # Second field has HTML and should not be stripped
            self.assertIn("<", row[1])
            self.assertIn(">", row[1])
Exemplo n.º 28
0
    def test_table_thead_tbody(self):
        filename = "tests/data/table-thead-tbody.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj)
        self.assertEqual(set(table.fields.keys()), set(["t1", "t2"]))
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].t1, "456")
        self.assertEqual(table[0].t2, "123")
        self.assertEqual(table[1].t1, "qqq")
        self.assertEqual(table[1].t2, "aaa")
Exemplo n.º 29
0
    def test_issue_168(self):
        temp = tempfile.NamedTemporaryFile(delete=False)
        filename = "{}.{}".format(temp.name, self.file_extension)
        self.files_to_delete.append(filename)

        table = rows.Table(fields=OrderedDict([("jsoncolumn", rows.fields.JSONField)]))
        table.append({"jsoncolumn": '{"python": 42}'})
        rows.export_to_html(table, filename)

        table2 = rows.import_from_html(filename)
        self.assert_table_equal(table, table2)
Exemplo n.º 30
0
    def test_table_thead_tbody(self):
        filename = 'tests/data/table-thead-tbody.html'
        fobj = open(filename, mode='rb')

        table = rows.import_from_html(fobj)
        self.assertEqual(set(table.fields.keys()), set(['t1', 't2']))
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].t1, '456')
        self.assertEqual(table[0].t2, '123')
        self.assertEqual(table[1].t1, 'qqq')
        self.assertEqual(table[1].t2, 'aaa')
Exemplo n.º 31
0
    def test_nested_tables_second_inner(self):
        filename = 'tests/data/nested-table.html'
        fobj = open(filename)

        table = rows.import_from_html(fobj, index=2)
        self.assertEqual(set(table.fields.keys()),
                         set(['t02r0c0', 't02r0c1']))
        self.assertEqual(len(table), 1)

        self.assertEqual(table[0].t02r0c0, 't0,2r1c0')
        self.assertEqual(table[0].t02r0c1, 't0,2r1c1')
Exemplo n.º 32
0
    def test_table_thead_tbody(self):
        filename = 'tests/data/table-thead-tbody.html'
        fobj = open(filename)

        table = rows.import_from_html(fobj)
        self.assertEqual(set(table.fields.keys()), set(['t1', 't2']))
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].t1, '456')
        self.assertEqual(table[0].t2, '123')
        self.assertEqual(table[1].t1, 'qqq')
        self.assertEqual(table[1].t2, 'aaa')
Exemplo n.º 33
0
    def test_import_from_html_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {'encoding': 'iso-8859-15', 'some_key': 123, 'other': 456, }
        result = rows.import_from_html(self.filename, **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs['meta'] = {'imported_from': 'html', 'filename': self.filename, }
        self.assertEqual(call[1], kwargs)
Exemplo n.º 34
0
Arquivo: records.py Projeto: zntt/stf
    def parse_process(self, response):
        row = response.request.meta['row']

        body = response.body_as_unicode()
        table = rows.import_from_html(io.BytesIO(body.encode('utf-8')),
                                      encoding='utf-8',
                                      index=1)
        andamentos = [row.andamento for row in table]
        row['andamentos'] = '|'.join(andamentos)

        return row
Exemplo n.º 35
0
    def test_import_from_html_fobj(self):
        # TODO: may test with codecs.open passing an encoding
        with open(self.filename, mode="rb") as fobj:
            table = rows.import_from_html(fobj, encoding=self.encoding)
        self.assert_table_equal(table, utils.table)

        expected_meta = {
            "imported_from": "html",
            "filename": self.filename,
            "encoding": self.encoding,
        }
        self.assertEqual(table.meta, expected_meta)
Exemplo n.º 36
0
 def parse_movements(self, response):
     process = response.request.meta['process']
     body = response.body_as_unicode()
     table = rows.import_from_html(
             io.BytesIO(body.encode('utf-8')),
             encoding='utf-8',
             index=1,
             force_types={'data': PtBrDateField},
     )
     for row in table:
         row = dict(row._asdict())
         row['numero_processo'] = process['numero_processo']
         row['classe_processo'] = process['classe_processo']
         yield row
Exemplo n.º 37
0
    def test_ignore_colspan(self):
        filename = "tests/data/colspan-table.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj, ignore_colspan=True)
        self.assertEqual(set(table.fields.keys()), set(["field1", "field2"]))
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].field1, "row1field1")
        self.assertEqual(table[0].field2, "row1field2")
        self.assertEqual(table[1].field1, "row2field1")
        self.assertEqual(table[1].field2, "row2field2")

        fobj = open(filename, mode="rb")
        table = rows.import_from_html(fobj, ignore_colspan=False)
        self.assertEquals(list(table.fields.keys()), ["huge_title", "field_1"])
        self.assertEquals(len(table), 3)
        expected_data = [
            ["field1", "field2"],
            ["row1field1", "row1field2"],
            ["row2field1", "row2field2"],
        ]
        for row_data, table_row in zip(expected_data, table):
            self.assertEqual(row_data, [table_row.huge_title, table_row.field_1])
    def test_import_from_html_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {'some_key': 123, 'other': 456, }
        result = rows.import_from_html(self.filename,
                                       encoding='iso-8859-1',
                                       **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs['meta'] = {'imported_from': 'html',
                          'filename': self.filename,
                          'encoding': 'iso-8859-1',}
        self.assertEqual(call[1], kwargs)
Exemplo n.º 39
0
    def test_import_from_html_uses_create_table(self, mocked_create_table):
        mocked_create_table.return_value = 42
        kwargs = {"some_key": 123, "other": 456}
        result = rows.import_from_html(self.filename, encoding="iso-8859-1", **kwargs)
        self.assertTrue(mocked_create_table.called)
        self.assertEqual(mocked_create_table.call_count, 1)
        self.assertEqual(result, 42)

        call = mocked_create_table.call_args
        kwargs["meta"] = {
            "imported_from": "html",
            "filename": self.filename,
            "encoding": "iso-8859-1",
        }
        self.assertEqual(call[1], kwargs)
Exemplo n.º 40
0
def search_router_database(query):
    response = requests.post(URL_ROUTER_SEARCH,
                             data={'action': 'routerList',
                                   'criteria': query,
                                   'site': 'drupal', })
    table = rows.import_from_html(BytesIO(response.content),
                                  encoding=response.encoding,
                                  properties=True)

    fields = OrderedDict()
    fields['id'] = rows.fields.IntegerField
    for field_name in FIELD_NAMES:
        if field_name in table.fields:
            fields[field_name] = table.fields[field_name]

    return rows.transform(fields, transform_row, table)
Exemplo n.º 41
0
    def test_preserve_html(self):
        filename = 'tests/data/nested-table.html'
        fobj = open(filename)

        table = rows.import_from_html(fobj, preserve_html=True)
        expected_data = [
                '<table>', '<tr>', '<td> t0,1r0c0 </td>',
                '<td> t0,1r0c1 </td>', '</tr>', '<tr>', '<td> t0,1r1c0 </td>',
                '<td> t0,1r1c1 </td>', '</tr>', '<tr>', '<td> t0,1r2c0 </td>',
                '<td> t0,1r2c1 </td>', '</tr>', '<tr>', '<td>', '<table>',
                '<tr>', '<td> t0,2r0c0 </td>', '<td> t0,2r0c1 </td>', '</tr>',
                '<tr>', '<td> t0,2r1c0 </td>', '<td> t0,2r1c1 </td>', '</tr>',
                '</table>', '</td>', '<td> t0,1r3c1 </td>', '</tr>', '<tr>',
                '<td> t0,1r4c0 </td>', '<td> t0,1r4c1 </td>', '</tr>', '<tr>',
                '<td> t0,1r5c0 </td>', '<td> t0,1r5c1 </td>', '</tr>',
                '</table>']
        self.assertEqual(cleanup_lines(table[1].t00r0c1), expected_data)
Exemplo n.º 42
0
    def test_preserve_html(self):
        filename = "tests/data/nested-table.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj, preserve_html=True)
        # TODO: test without passing encoding
        expected_data = [
            "<table>",
            "<tr>",
            "<td> t0,1r0c0 </td>",
            "<td> t0,1r0c1 </td>",
            "</tr>",
            "<tr>",
            "<td> t0,1r1c0 </td>",
            "<td> t0,1r1c1 </td>",
            "</tr>",
            "<tr>",
            "<td> t0,1r2c0 </td>",
            "<td> t0,1r2c1 </td>",
            "</tr>",
            "<tr>",
            "<td>",
            "<table>",
            "<tr>",
            "<td> t0,2r0c0 </td>",
            "<td> t0,2r0c1 </td>",
            "</tr>",
            "<tr>",
            "<td> t0,2r1c0 </td>",
            "<td> t0,2r1c1 </td>",
            "</tr>",
            "</table>",
            "</td>",
            "<td> t0,1r3c1 </td>",
            "</tr>",
            "<tr>",
            "<td> t0,1r4c0 </td>",
            "<td> t0,1r4c1 </td>",
            "</tr>",
            "<tr>",
            "<td> t0,1r5c0 </td>",
            "<td> t0,1r5c1 </td>",
            "</tr>",
            "</table>",
        ]
        self.assertEqual(cleanup_lines(table[1].t00r0c1), expected_data)
Exemplo n.º 43
0
    def test_extract_properties(self):
        filename = "tests/data/properties-table.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj, properties=True)
        self.assertEqual(table.field_names, ["field1", "field2", "properties"])
        self.assertEqual(
            table.field_types,
            [rows.fields.TextField, rows.fields.TextField, rows.fields.JSONField],
        )
        properties_1 = {"class": "some-class another-class", "data-test": "value"}
        properties_2 = {"class": "css-class", "data-test": "value2"}
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].field1, "row1field1")
        self.assertEqual(table[0].field2, "row1field2")
        self.assertEqual(table[0].properties, properties_1)
        self.assertEqual(table[1].field1, "row2field1")
        self.assertEqual(table[1].field2, "row2field2")
        self.assertEqual(table[1].properties, properties_2)
Exemplo n.º 44
0
def router_images(router_id):
    html = _router_detail(router_id)
    table = rows.import_from_html(BytesIO(html), index=1, preserve_html=True)
    fields = OrderedDict([('date', rows.fields.DateField),
                          ('filename', rows.fields.TextField),
                          ('url', rows.fields.TextField),
                          ('size', rows.fields.TextField),
                          ('description', rows.fields.TextField)])

    def transform(row, table):
        file_data = tag_to_dict(row.filename)
        absolute_url = url_join(URL_ROUTER_SEARCH,
                                url_quote(file_data['href']))
        return {'date': extract_text(row.date),
                'description': extract_text(row.description),
                'filename': file_data['text'],
                'size': extract_text(row.size),
                'url': absolute_url, }

    return rows.transform(fields, transform, table)
Exemplo n.º 45
0
    def test_extract_properties(self):
        filename = 'tests/data/properties-table.html'
        fobj = open(filename, mode='rb')

        table = rows.import_from_html(fobj, properties=True)
        self.assertEqual(table.field_names,
                         ['field1', 'field2', 'properties'])
        self.assertEqual(table.field_types,
                         [rows.fields.TextField,
                          rows.fields.TextField,
                          rows.fields.JSONField])
        properties_1 = {'class': 'some-class another-class',
                        'data-test': 'value', }
        properties_2 = {'class': 'css-class', 'data-test': 'value2', }
        self.assertEqual(len(table), 2)
        self.assertEqual(table[0].field1, 'row1field1')
        self.assertEqual(table[0].field2, 'row1field2')
        self.assertEqual(table[0].properties, properties_1)
        self.assertEqual(table[1].field1, 'row2field1')
        self.assertEqual(table[1].field2, 'row2field2')
        self.assertEqual(table[1].properties, properties_2)
Exemplo n.º 46
0
    def test_nested_tables_first_inner(self):
        filename = "tests/data/nested-table.html"
        fobj = open(filename, mode="rb")

        table = rows.import_from_html(fobj, index=1)
        self.assertEqual(set(table.fields.keys()), set(["t01r0c0", "t01r0c1"]))
        self.assertEqual(len(table), 5)

        self.assertEqual(table[0].t01r0c0, "t0,1r1c0")
        self.assertEqual(table[0].t01r0c1, "t0,1r1c1")

        self.assertEqual(table[1].t01r0c0, "t0,1r2c0")
        self.assertEqual(table[1].t01r0c1, "t0,1r2c1")

        inner_table = "t0,2r0c0 t0,2r0c1 t0,2r1c0 t0,2r1c1"
        self.assertEqual(table[2].t01r0c0, inner_table)
        self.assertEqual(table[2].t01r0c1, "t0,1r3c1")

        self.assertEqual(table[3].t01r0c0, "t0,1r4c0")
        self.assertEqual(table[3].t01r0c1, "t0,1r4c1")

        self.assertEqual(table[4].t01r0c0, "t0,1r5c0")
        self.assertEqual(table[4].t01r0c1, "t0,1r5c1")
Exemplo n.º 47
0
    def test_nested_tables_outer(self):
        filename = 'tests/data/nested-table.html'
        fobj = open(filename)

        table = rows.import_from_html(fobj)
        self.assertEqual(set(table.fields.keys()),
                         set(['t00r0c0', 't00r0c1', 't00r0c2']))
        self.assertEqual(len(table), 3)

        self.assertEqual(table[0].t00r0c0, 't0,0r1c0')
        self.assertEqual(table[0].t00r0c1, 't0,0r1c1')
        self.assertEqual(table[0].t00r0c2, 't0,0r1c2')

        inner_table = ('t0,1r0c0 t0,1r0c1 t0,1r1c0 t0,1r1c1 t0,1r2c0 '
                       't0,1r2c1 t0,2r0c0 t0,2r0c1 t0,2r1c0 t0,2r1c1 '
                       't0,1r3c1 t0,1r4c0 t0,1r4c1 t0,1r5c0 t0,1r5c1').split()
        self.assertEqual(table[1].t00r0c0, 't0,0r2c0')
        self.assertEqual(cleanup_lines(table[1].t00r0c1), inner_table)
        self.assertEqual(table[1].t00r0c2, 't0,0r2c2')

        self.assertEqual(table[2].t00r0c0, 't0,0r3c0')
        self.assertEqual(table[2].t00r0c1, 't0,0r3c1')
        self.assertEqual(table[2].t00r0c2, 't0,0r3c2')