示例#1
0
    def test_url(self):
        url = testdata.get_url()
        v = Q(url)
        self.assertTrue(v.is_url())

        u = v.url()
        self.assertTrue(url.endswith(u.netloc))
示例#2
0
    def test_no_headers(self):
        html = [
            "<table>",
            "    <tr>",
            "        <td>Column 1</td>",
            "        <td>Column 2</td>",
            "        <td>Column 3</td>",
            "        <td>Column 4</td>",
            "    </tr>",
            "</table>",
        ]
        t = Table(testdata.get_url(), "\n".join(html))
        t.parse()

        result = {
            '0': {
                "headers": [],
                "value": "Column 1",
            },
            '1': {
                "headers": [],
                "value": "Column 2",
            },
            '2': {
                "headers": [],
                "value": "Column 3",
            },
            '3': {
                "headers": [],
                "value": "Column 4",
            },
        }
        self.assertEqual(result, t.tables[0]["rows"][0])
示例#3
0
    def test_tables6(self):
        html = self.get_html("tables6")
        t = Table(testdata.get_url(), html)
        t.parse()

        self.assertEqual(5, len(t.fields["tables"][0]["rows"]))

        for row in t.fields["tables"][0]["rows"]:
            self.assertEqual(4, len(row))

        for v in ["1 header", "1 subheader"]:
            for row in t.fields["tables"][0]["rows"][0].values():
                self.assertTrue(v in row["headers"])

        for v in ["2 header", "2 subheader"]:
            for row in t.fields["tables"][0]["rows"][1].values():
                self.assertTrue(v in row["headers"])

        for v in ["2 header", "3 subheader"]:
            for row in t.fields["tables"][0]["rows"][2].values():
                self.assertTrue(v in row["headers"])

        for v in ["4 header", "4 subheader"]:
            for row in t.fields["tables"][0]["rows"][3].values():
                self.assertTrue(v in row["headers"])
示例#4
0
 def test_get_url(self):
     s = testdata.get_url()
     self.assertNotEqual("", s)
     if is_py2:
         self.assertRegexpMatches(s, r'https?\://\S*')
     else:
         self.assertRegex(s, r'https?\://\S*')
示例#5
0
 def test_digital(self):
     nit = Item(price=100,
                body={
                    "url": testdata.get_url(),
                    "title": testdata.get_words(),
                    "digital": True,
                    "price": 1.0
                },
                uuid="foo")
     self.assertTrue(" (digital)" in nit.title)
示例#6
0
 def test_new_old_price(self):
     uuid = testdata.get_hash()
     body = {
         "url": testdata.get_url(),
         "title": testdata.get_words(),
     }
     oit = WatchlistItem.create(price=10, body=dict(body), uuid=uuid)
     it = Item(price=1, body=dict(body), uuid=uuid)
     s = it.html_detail()
     self.assertTrue("<b>$1.00</b>" in s)
     self.assertTrue("was <b>$10.00</b>" in s)
示例#7
0
 def test_image_col(self):
     html = [
         "<table>",
         "    <tr>",
         "        <td><img src=\"https://foo.com/image.jpg\" /></td>",
         "        <td>Column 2</td>",
         "    </tr>",
         "</table>",
     ]
     t = Table(testdata.get_url(), "\n".join(html))
     t.parse()
     self.assertEqual("https://foo.com/image.jpg",
                      t.tables[0]["rows"][0]["0"]["value"])
示例#8
0
    def test_dimensions(self):
        html = self.get_html("tables4")
        t = Table(testdata.get_url(), html)

        tables = t.soup.find_all("table")

        cols, rows = t.find_dimensions(tables[2])
        self.assertEqual((4, 3), (cols, rows))

        cols, rows = t.find_dimensions(tables[3])
        self.assertEqual((3, 2), (cols, rows))

        cols, rows = t.find_dimensions(tables[1])
        self.assertEqual((2, 1), (cols, rows))

        cols, rows = t.find_dimensions(tables[5])
        self.assertEqual((6, 2), (cols, rows))

        html = self.get_html("tables3")
        t = Table(testdata.get_url(), html)
        cols, rows = t.find_dimensions(t.soup.find_all("table")[1])
        self.assertEqual((3, 55), (cols, rows))
示例#9
0
    def test_row_th(self):
        """This was one of the test tables in the docs, it has a td in the header
        row and a th in the content row, the parser needs to handle these cases

        this was my original note:
            we need to make sure it can parse a table that has a th and td in the same tr
        """
        html = [
            '<table>',
            '    <tr>',
            '        <td> </td>',
            '        <th scope="col">Batman</th>',
            '        <th scope="col">Robin</th>',
            '        <th scope="col">The Flash</th>',
            '        <th scope="col">Kid Flash</th>',
            '    </tr>',
            '    <tr>',
            '        <th scope="row">Skill</th>',
            '        <td>Smarts</td>',
            '        <td>Dex, acrobat</td>',
            '        <td>Super speed</td>',
            '        <td>Super speed</td>',
            '    </tr>',
            '</table>',
        ]
        t = Table(testdata.get_url(), "\n".join(html))
        t.parse()

        result = {
            '0': {
                "headers": [],
                "value": "Skill",
            },
            'Batman': {
                "headers": [],
                "value": "Smarts"
            },
            'Robin': {
                "headers": [],
                "value": "Dex, acrobat",
            },
            'The Flash': {
                "headers": [],
                "value": "Super speed"
            },
            'Kid Flash': {
                "headers": [],
                "value": "Super speed"
            }
        }
        self.assertEqual(result, t.tables[0]["rows"][0])
示例#10
0
def get_item(item=None, **kwargs):
    if item:
        body = dict(item.newest.body)
        body.update(kwargs)
        body.setdefault("uuid", item.uuid)
        kwargs = body

    price = kwargs.pop("price", testdata.get_int(1000))
    uuid = kwargs.pop("uuid", testdata.get_hash())

    kwargs.setdefault("url", testdata.get_url())
    kwargs.setdefault("digital", testdata.get_bool())
    kwargs.setdefault("image", testdata.get_url())
    kwargs.setdefault("title", testdata.get_words())

    if isinstance(price, float):
        kwargs["price"] = price
        price = int(price * 100.0)
    else:
        kwargs["price"] = float(price) * 0.01

    it = Item(price=price, body=kwargs, uuid=uuid)
    return it
示例#11
0
 def test_header_error(self):
     #html = self.get_html("tables_wikipedia1")
     #         t = Table(testdata.get_url(), html)
     #         t.parse()
     #         pout.v(t.tables[1])
     col_count = 13
     html = self.get_html("tables7")
     t = Table(testdata.get_url(), html)
     t.parse()
     table = t.tables[0]
     self.assertTrue(table.caption)
     for r in table:
         #pout.v(r["Icon"], r["Emoji"], r["Meaning"])
         self.assertEqual(col_count, len(list(r.columns())))
         self.assertTrue(r.get("Icon"))
示例#12
0
    def test_dl(self):
        url = testdata.get_url()

        html = "\n".join([
            "<dl>",
            "  <dt>term1</dt>",
            "  <dd>definition 1</dd>",
            "  <dt>term2</dt>",
            "  <dd>definition 2</dd>",
            "</dl>",
            "<dl>",
            "  <dt>term3</dt>",
            "  <dd>definition 3</dd>",
            "  <dt>term4</dt>",
            "  <dd>definition 4</dd>",
            "</dl>",
        ])
        t = Table(url, html)
        t.parse()
        self.assertEqual(2, len(t.fields["dls"]))
        self.assertEqual(2, len(t.fields["dls"][0]))
        self.assertEqual(2, len(t.fields["dls"][1]))

        html = "\n".join([
            "<dl>",
            "  <dt>term1</dt>",
            "  <dt>term2</dt>",
            "  <dd>definition 1</dd>",
            "  <dt>term3</dt>",
            "  <dd>definition 2</dd>",
            "</dl>",
        ])

        t = Table(url, html)
        t.parse()
        self.assertEqual(1, len(t.fields["dls"]))
        self.assertEqual(2, len(t.fields["dls"][0]))
示例#13
0
 def test_get_url(self):
     s = testdata.get_url()
     self.assertNotEqual(u"", s)
     self.assertRegexpMatches(s, 'https?\://\S*')
示例#14
0
    def test_content(self):
        html = self.get_html("tables4")
        t = Table(testdata.get_url(), html)
        tables = t.soup.find_all("table")
        tests = {
            0: [{
                'Last name': {
                    "headers": [],
                    "value": "Doe",
                },
                'First name': {
                    "headers": [],
                    "value": "John"
                }
            }, {
                'Last name': {
                    "headers": [],
                    "value": "Doe",
                },
                'First name': {
                    "headers": [],
                    "value": "Jane"
                }
            }],
            1: [{
                'Header content 1': {
                    "headers": [],
                    "value": "Body content 1",
                },
                'Header content 2': {
                    "headers": [],
                    "value": "Body content 2"
                },
            }, {
                'Header content 1': {
                    "headers": [],
                    "value": "Footer content 1",
                },
                'Header content 2': {
                    "headers": [],
                    "value": "Footer content 2"
                },
            }],
            6: [{
                'header 2': {
                    "headers": [],
                    "value": "Content 1.2",
                },
                'header 1': {
                    "headers": [],
                    "value": "Content 1.1"
                },
            }, {
                'header 2': {
                    "headers": [],
                    "value": "Content 2.2",
                },
                'header 1': {
                    "headers": [],
                    "value": "Content 2.1"
                },
            }]
        }

        for i, r in tests.items():
            ret = t.find_table(tables[i])
            self.assertEqual(r, ret["rows"])