示例#1
0
 def test_no_attr(self):
     # if the attribute doesn't exist, returns None
     example = "<a href=\"http://www.example.com\">Test</a>"
     ele = html.fragment_fromstring(example)
     r = Rule.from_json({"name": "url", "attr": "src", "type": "string"})
     val = r.data(ele)
     self.assertIsNone(val)
示例#2
0
 def test_bad_float(self):
     # should return None
     r = Rule.from_json({"name": "url", "attr": "text", "type": "float"})
     html_string = "<div>Nothing to see here</div>"
     ele = html.fragment_fromstring(html_string)
     val = r.data(ele)
     self.assertIsNone(val)
示例#3
0
 def test_type_string(self):
     example = "<a href=\"http://www.example.com\">Test</a>"
     ele = html.fragment_fromstring(example)
     r = Rule.from_json({"name": "url", "attr": "href", "type": "string"})
     val = r.data(ele)
     self.assertIsInstance(val, str)
     self.assertEqual(val, "http://www.example.com")
示例#4
0
 def test_from_json(self):
     rules = [
         {
             "name": "title",
             "attr": "text",
             "type": "string"
         },
         {
             "name": "url",
             "attr": "href",
             "type": "string"
         },
         {
             "name": "img",
             "attr": "src",
             "type": "string"
         },
         {
             "name": "description",
             "attr": "text",
             "type": "string"
         }
     ]
     for rule_json in rules:
         a = Rule.from_json(rule_json)
         self.assertIsInstance(a, Rule)
         self.assertEqual(a.name, rule_json["name"])
         self.assertEqual(a.attr, rule_json["attr"])
示例#5
0
 def test_bad_from_json(self):
     # returns None if either name or rule aren't provided
     bad_rules = [
         {
             "name": "foo"
         },
         {
             "attr": "bar"
         },
         {
             "type": "float"
         },
         {}
     ]
     for rule_json in bad_rules:
         with self.assertRaises(ValueError):
             Rule.from_json(rule_json)
示例#6
0
 def test_type_float(self):
     examples = [("<p data-num=\"3.14159\">Test</p>", "data-num", 3.14159),
                 ("<p>26.2 miles</p>", "text", 26.2),
                 ("<p>In the 98.325th percentile</p>", "text", 98.325)]
     for example in examples:
         html_string, attr, expected = example
         r = Rule.from_json({"name": "url", "attr": attr, "type": "float"})
         ele = html.fragment_fromstring(html_string)
         val = r.data(ele)
         self.assertIsInstance(val, float)
         self.assertEqual(val, expected)
示例#7
0
 def test_type_string(self):
     example = "<a href=\"http://www.example.com\">Test</a>"
     ele = html.fragment_fromstring(example)
     r = Rule.from_json({
         "name": "url",
         "attr": "href",
         "type": "string"
     })
     val = r.data(ele)
     self.assertIsInstance(val, str)
     self.assertEqual(val, "http://www.example.com")
示例#8
0
 def test_type_int(self):
     examples = [("<p data-index=\"3\">Test</p>", "data-index", 3),
                 ("<p>15 miles</p>", "text", 15),
                 ("<p>The 18th of July</p>", "text", 18)]
     for example in examples:
         html_string, attr, expected = example
         r = Rule.from_json({"name": "url", "attr": attr, "type": "int"})
         ele = html.fragment_fromstring(html_string)
         val = r.data(ele)
         self.assertIsInstance(val, int)
         self.assertEqual(val, expected)
示例#9
0
 def test_no_attr(self):
     # if the attribute doesn't exist, returns None
     example = "<a href=\"http://www.example.com\">Test</a>"
     ele = html.fragment_fromstring(example)
     r = Rule.from_json({
         "name": "url",
         "attr": "src",
         "type": "string"
     })
     val = r.data(ele)
     self.assertIsNone(val)
示例#10
0
 def test_bad_int(self):
     # should return -1
     r = Rule.from_json({
         "name": "url",
         "attr": "text",
         "type": "int"
     })
     html_string = "<div>Nothing to see here</div>"
     ele = html.fragment_fromstring(html_string)
     val = r.data(ele)
     self.assertIsNone(val)
示例#11
0
 def test_bad_spec_name(self):
     bad_specs = [{
         "type": "all"
     }, {
         "type": "all",
         "name": 0
     }, {
         "type": "all",
         "name": ""
     }]
     for bs in bad_specs:
         with self.assertRaises(ValueError):
             AllElement("a", bs, [], [Rule("url", "href", "string")])
示例#12
0
 def test_bad_spec_values(self):
     bad_specs = [
         {
             "type": "range",
             "low": 0,
             "high": 3
         },  # no name
         {
             "type": "range",
             "name": 0,
             "low": 0,
             "high": 3
         },  # bad name type
         {
             "type": "range",
             "name": "",
             "low": 0,
             "high": 3
         },  # bad name value
         {
             "type": "range",
             "name": "foo",
             "high": 3
         },  # no low
         {
             "type": "range",
             "name": "foo",
             "low": "0",
             "high": 3
         },  # bad low value
         {
             "type": "range",
             "name": "foo",
             "low": 0
         },  # no high
         {
             "type": "range",
             "name": "foo",
             "low": 0,
             "high": "3"
         },  # bad high value
         {
             "type": "range",
             "name": "foo",
             "low": 3,
             "high": 0
         },  # low > high
     ]
     for bs in bad_specs:
         with self.assertRaises(ValueError):
             RangeElement("a", bs, [], [Rule("url", "href", "string")])
示例#13
0
 def test_good_spec_values(self):
     good_specs = [{
         "type": "range",
         "name": "Foo",
         "low": 0,
         "high": 3
     }, {
         "type": "range",
         "name": "Foo",
         "low": 0,
         "high": None
     }]
     for spec in good_specs:
         ele = RangeElement("a", spec, [], [Rule("url", "href", "string")])
         self.assertIsInstance(ele, RangeElement)
示例#14
0
 def test_type_float(self):
     examples = [
         ("<p data-num=\"3.14159\">Test</p>", "data-num", 3.14159),
         ("<p>26.2 miles</p>", "text", 26.2),
         ("<p>In the 98.325th percentile</p>", "text", 98.325)
     ]
     for example in examples:
         html_string, attr, expected = example
         r = Rule.from_json({
             "name": "url",
             "attr": attr,
             "type": "float"
         })
         ele = html.fragment_fromstring(html_string)
         val = r.data(ele)
         self.assertIsInstance(val, float)
         self.assertEqual(val, expected)
示例#15
0
 def test_type_int(self):
     examples = [
         ("<p data-index=\"3\">Test</p>", "data-index", 3),
         ("<p>15 miles</p>", "text", 15),
         ("<p>The 18th of July</p>", "text", 18)
     ]
     for example in examples:
         html_string, attr, expected = example
         r = Rule.from_json({
             "name": "url",
             "attr": attr,
             "type": "int"
         })
         ele = html.fragment_fromstring(html_string)
         val = r.data(ele)
         self.assertIsInstance(val, int)
         self.assertEqual(val, expected)
示例#16
0
 def test_from_json(self):
     rules = [{
         "name": "title",
         "attr": "text",
         "type": "string"
     }, {
         "name": "url",
         "attr": "href",
         "type": "string"
     }, {
         "name": "img",
         "attr": "src",
         "type": "string"
     }, {
         "name": "description",
         "attr": "text",
         "type": "string"
     }]
     for rule_json in rules:
         a = Rule.from_json(rule_json)
         self.assertIsInstance(a, Rule)
         self.assertEqual(a.name, rule_json["name"])
         self.assertEqual(a.attr, rule_json["attr"])
示例#17
0
 def test_good_spec_index(self):
     single = SingleElement("a", {
         "type": "single",
         "index": 0
     }, [], [Rule("url", "href", "string")])
     self.assertIsInstance(single, SingleElement)
示例#18
0
 def test_bad_from_json(self):
     # returns None if either name or rule aren't provided
     bad_rules = [{"name": "foo"}, {"attr": "bar"}, {"type": "float"}, {}]
     for rule_json in bad_rules:
         with self.assertRaises(ValueError):
             Rule.from_json(rule_json)
示例#19
0
 def test_good_spec_name(self):
     all_e = AllElement("a", {
         "type": "all",
         "name": "links"
     }, [], [Rule("url", "href", "string")])
     self.assertIsInstance(all_e, AllElement)
示例#20
0
 def test_bad_spec_index(self):
     bad_specs = [{"type": "single"}, {"type": "single", "index": "0"}]
     for bs in bad_specs:
         with self.assertRaises(ValueError):
             SingleElement("a", bs, [], [Rule("url", "href", "string")])