Пример #1
0
 def test_empty_from_json(self):
     # an empty Element has no rules or children
     # creating it should fail
     empty_json = {
         "selector": "div",
         "children": [],
         "rules": [],
         "spec": {
             "type": "single",
             "index": 0
         }
     }
     with self.assertRaises(ValueError):
         ElementFactory.from_json(empty_json)
Пример #2
0
    def test_all_child_single_optional_fail(self):
        """
        When an Element is optional, its rules and all descendent Element's
        rules must be considered optional
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["optional"] = True
        all_copy["children"].append(single_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        optional_outputs = [
            {
                "items": [
                    {
                        "title": "Foundation",
                        "url": "http://www.isaacasimov.com/foundation"
                    }
                ]
            },
            {},
        ]
        for out in optional_outputs:
            self.assertFalse(compare(out, flattened))
Пример #3
0
    def test_range_element(self):
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        output = {"items": [{"count": 7}, {"count": 12}]}
        self.assertIsNone(differences(output, flattened))
Пример #4
0
    def test_all_optional(self):
        """
        for an optional AllElement, the dict for it
        will be optional as will all rules from it and
        its children
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        all_copy["optional"] = True
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        # "items", dict, True
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, True)

        expected_item_keys = [
            ("count", int, True)
        ]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #5
0
    def test_all_child_single_optional(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        all_copy["children"].append(single_copy)
        single_copy["optional"] = True
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        # "items", dict, False
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, False)

        expected_item_keys = [
            ("count", int, False),
            ("title", str, True),
            ("url", str, True)
        ]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #6
0
 def test_children_data(self):
     e = ElementFactory.from_json(SINGLE_JSON)
     parent = html.fragment_fromstring(
         "<body><div><a href=\"#\">Test</a></div></body>")
     data = e.data(parent)
     self.assertIn("link", data)
     self.assertIn("headline", data)
Пример #7
0
    def test_all_child_single(self):
        """
        When a SingleElement is nested within an AllELement
        (or RangeElement), all of its rules will be added to the
        dict of the AllElement's rules.
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        all_copy["children"].append(single_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {
                    "count": 7,
                    "title": "Nightfall",
                    "url": "http://www.isaacasimov.com/nightfall"
                },
                {
                    "count": 12,
                    "title": "Foundation",
                    "url": "http://www.isaacasimov.com/foundation"
                }
            ]
        }

        self.assertTrue(compare(output, flattened))
Пример #8
0
    def test_range_optional(self):
        """
        the RangeElement flattens the same as an AllElement
        """
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        range_copy["optional"] = True
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        # "items", dict, True
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, True)

        expected_item_keys = [("count", int, True)]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #9
0
    def test_all_child_all_optional(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT)
        alt_all_copy["optional"] = True
        all_copy["children"].append(alt_all_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        outputs = [{
            "items": [{
                "count":
                6,
                "paragraphs": [{
                    "description": "foo"
                }, {
                    "description": "bar"
                }]
            }]
        }, {
            "items": [{
                "count": 6,
                "paragraphs": []
            }]
        }, {
            "items": [{
                "count": 6
            }]
        }]

        for o in outputs:
            self.assertIsNone(differences(o, flattened))
Пример #10
0
    def test_single_child_all(self):
        """
        when an AllElement (or RangeElement) is nested within
        a SingleElement, a key/value pair with the AllElement's
        spec name and a dict containing it (and its children's)
        rules will be created
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["children"].append(all_copy)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        output = {
            "title": "Nightfall",
            "url": "http://www.isaacasimov.com/nightfall",
            "items": [
                {
                    "count": 7,
                },
                {
                    "count": 12,
                }
            ]
        }

        self.assertTrue(compare(output, flattened))
Пример #11
0
 def test_single_from_json(self):
     e = ElementFactory.from_json(SINGLE_JSON)
     self.assertIsNotNone(e)
     self.assertIsInstance(e, SingleElement)
     # test type of child
     first_child = e.children[0]
     self.assertIsInstance(first_child, SingleElement)
Пример #12
0
    def test_single_child_all_optional_fail(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["children"].append(all_copy)
        all_copy["optional"] = True
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        bad_outputs = [
            {
                "url": "http://www.isaacasimov.com/nightfall",
                "items": [
                    {
                        "count": 7,
                    },
                    {
                        "count": 12,
                    }
                ]
            },
            {
                "title": "Nightfall",
                "items": [
                    {
                        "count": 7,
                    },
                    {
                        "count": 12,
                    }
                ]
            }
        ]

        for o in bad_outputs:
            self.assertFalse(compare(o, flattened))
Пример #13
0
    def test_all_child_single_optional_fail(self):
        """
        When an Element is optional, its rules and all descendent Element's
        rules must be considered optional
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["optional"] = True
        all_copy["children"].append(single_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        optional_outputs = [
            {
                "items": [
                    {
                        "title": "Foundation",
                        "url": "http://www.isaacasimov.com/foundation"
                    }
                ]
            },
            {},
        ]
        for out in optional_outputs:
            self.assertIsNotNone(differences(out, flattened))
Пример #14
0
    def test_all_child_single(self):
        """
        When a SingleElement is nested within an AllELement
        (or RangeElement), all of its rules will be added to the
        dict of the AllElement's rules.
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        all_copy["children"].append(single_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {
                    "count": 7,
                    "title": "Nightfall",
                    "url": "http://www.isaacasimov.com/nightfall"
                },
                {
                    "count": 12,
                    "title": "Foundation",
                    "url": "http://www.isaacasimov.com/foundation"
                }
            ]
        }

        self.assertIsNone(differences(output, flattened))
Пример #15
0
    def test_single_child_all_optional_fail(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["children"].append(all_copy)
        all_copy["optional"] = True
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        bad_outputs = [
            {
                "url": "http://www.isaacasimov.com/nightfall",
                "items": [
                    {
                        "count": 7,
                    },
                    {
                        "count": 12,
                    }
                ]
            },
            {
                "title": "Nightfall",
                "items": [
                    {
                        "count": 7,
                    },
                    {
                        "count": 12,
                    }
                ]
            }
        ]

        for o in bad_outputs:
            self.assertIsNotNone(differences(o, flattened))
Пример #16
0
    def test_single_child_all(self):
        """
        when an AllElement (or RangeElement) is nested within
        a SingleElement, a key/value pair with the AllElement's
        spec name and a dict containing it (and its children's)
        rules will be created
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["children"].append(all_copy)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        output = {
            "title": "Nightfall",
            "url": "http://www.isaacasimov.com/nightfall",
            "items": [
                {
                    "count": 7,
                },
                {
                    "count": 12,
                }
            ]
        }

        self.assertIsNone(differences(output, flattened))
Пример #17
0
    def test_range_optional(self):
        """
        the RangeElement flattens the same as an AllElement
        """
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        range_copy["optional"] = True
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        # "items", dict, True
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, True)

        expected_item_keys = [
            ("count", int, True)
        ]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #18
0
 def test_all_child_single_fail(self):
     """
     When a SingleElement is nested within an AllELement
     (or RangeElement), all of its rules will be added to the
     dict of the AllElement's rules.
     """
     all_copy = copy.deepcopy(ALL_ELEMENT)
     single_copy = copy.deepcopy(SINGLE_ELEMENT)
     all_copy["children"].append(single_copy)
     ele = ElementFactory.from_json(all_copy)
     flattened = flatten_element(ele)
     bad_outputs = [{
         "items": [{
             "count": "12",
             "title": "Foundation",
             "url": "http://www.isaacasimov.com/foundation"
         }]
     }, {
         "items": [{
             "title": "Foundation",
             "url": "http://www.isaacasimov.com/foundation"
         }]
     }, {
         "items": [{
             "count": 12,
             "url": "http://www.isaacasimov.com/foundation"
         }]
     }, {}]
     for out in bad_outputs:
         self.assertIsNotNone(differences(out, flattened))
Пример #19
0
    def test_all_optional(self):
        """
        for an optional AllElement, the dict for it
        will be optional as will all rules from it and
        its children
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        all_copy["optional"] = True
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        # "items", dict, True
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, True)

        expected_item_keys = [("count", int, True)]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #20
0
    def test_all_child_all_optional_fail(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT)
        alt_all_copy["optional"] = True
        all_copy["children"].append(alt_all_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        bad_outputs = [
            {
                "items": [
                    {
                        "paragraphs": [
                            {
                                "description": "foo"
                            },
                            {
                                "description": "bar"
                            }
                        ]
                    }
                ]
            },
            {}
        ]

        for o in bad_outputs:
            self.assertIsNotNone(differences(o, flattened))
Пример #21
0
    def test_all_child_single_optional(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        all_copy["children"].append(single_copy)
        single_copy["optional"] = True
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        # "items", dict, False
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, False)

        expected_item_keys = [("count", int, False), ("title", str, True),
                              ("url", str, True)]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #22
0
 def test_single_from_json(self):
     e = ElementFactory.from_json(SINGLE_JSON)
     self.assertIsNotNone(e)
     self.assertIsInstance(e, SingleElement)
     # test type of child
     first_child = e.children[0]
     self.assertIsInstance(first_child, SingleElement)
Пример #23
0
    def test_all_element_fail(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {"items": [{"count": 7}, {}]}
        self.assertIsNotNone(differences(output, flattened))
Пример #24
0
    def test_all_child_all_optional_fail(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT)
        alt_all_copy["optional"] = True
        all_copy["children"].append(alt_all_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        bad_outputs = [
            {
                "items": [
                    {
                        "paragraphs": [
                            {
                                "description": "foo"
                            },
                            {
                                "description": "bar"
                            }
                        ]
                    }
                ]
            },
            {}
        ]

        for o in bad_outputs:
            self.assertFalse(compare(o, flattened))
Пример #25
0
    def test_single_element(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        output = {
            "title": "Nightfall",
            "url": "http://www.isaacasimov.com/nightfall"
        }
        self.assertTrue(compare(output, flattened))
Пример #26
0
    def test_single_element(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        output = {
            "title": "Nightfall",
            "url": "http://www.isaacasimov.com/nightfall"
        }
        self.assertIsNone(differences(output, flattened))
Пример #27
0
    def test_all_element(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {"count": 7},
                {"count": 12}
            ]
        }
        self.assertIsNone(differences(output, flattened))
Пример #28
0
    def test_range_element_fail(self):
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {"count": 7},
                {}
            ]
        }
        self.assertIsNotNone(differences(output, flattened))
Пример #29
0
    def test_single_child_single_optional_fail(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT)
        single_copy["children"].append(alt_single_copy)
        alt_single_copy["optional"] = True
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        outputs = [{"title": "The Dark Forest", "text": "The Second Novel"}]

        for o in outputs:
            self.assertIsNotNone(differences(o, flattened))
Пример #30
0
    def test_all_element(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {"count": 7},
                {"count": 12}
            ]
        }
        self.assertTrue(compare(output, flattened))
Пример #31
0
    def test_range_element_fail(self):
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {"count": 7},
                {}
            ]
        }
        self.assertFalse(compare(output, flattened))
Пример #32
0
    def test_optional_range_element(self):
        """
        for an optional RangeElement, the dict for it
        will be optional as will all rules from it and
        its children
        """
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        range_copy["optional"] = True
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        output = {}
        self.assertTrue(compare(output, flattened))
Пример #33
0
    def test_optional_range_element(self):
        """
        for an optional RangeElement, the dict for it
        will be optional as will all rules from it and
        its children
        """
        range_copy = copy.deepcopy(RANGE_ELEMENT)
        range_copy["optional"] = True
        ele = ElementFactory.from_json(range_copy)
        flattened = flatten_element(ele)

        output = {}
        self.assertIsNone(differences(output, flattened))
Пример #34
0
    def test_optional_all_element(self):
        """
        for an optional AllElement, the dict for it
        will be optional as will all rules from it and
        its children
        """
        all_copy = copy.deepcopy(ALL_ELEMENT)
        all_copy["optional"] = True
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {}
        self.assertIsNone(differences(output, flattened))
Пример #35
0
    def test_single_child_single(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT)
        single_copy["children"].append(alt_single_copy)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        output = {
            "title": "The Dark Forest",
            "url": "http://www.liucixin.com/the_dark_forest",
            "text": "The Second Novel"
        }

        self.assertTrue(compare(output, flattened))
Пример #36
0
    def test_single_child_single(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT)
        single_copy["children"].append(alt_single_copy)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        output = {
            "title": "The Dark Forest",
            "url": "http://www.liucixin.com/the_dark_forest",
            "text": "The Second Novel"
        }

        self.assertIsNone(differences(output, flattened))
Пример #37
0
    def test_single(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        expected = [("url", str, False), ("title", str, False)]

        for item in expected:
            name, expected_type, expected_optional = item
            rule = flattened.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #38
0
    def test_single_element_fail(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        outputs = [{
            "title": "Nightfall"
        }, {
            "url": "http://www.isaacasimov.com/nightfall"
        }, {
            "title": "Nightfall",
            "url": 7
        }, {}]
        for o in outputs:
            self.assertIsNotNone(differences(o, flattened))
Пример #39
0
 def test_rule_no_attr(self):
     """
     when an element returns None, it is filtered out
     """
     e = ElementFactory.from_json(RANGE_JSON)
     parent = html.fragment_fromstring("""
     <section>
         <div><a href=\"#\">Test</a></div>
         <div><a>Test</a></div>
         <div><a href=\"#\">Test</a></div>
         <div><a href=\"#\">Test</a></div>
     </section>
     """)
     data = e.data(parent)
     divs = data.get("divs")
     self.assertEqual(len(divs), 1)
Пример #40
0
 def test_child_element_doesnt_exist(self):
     """
     when a child element doesn't exist, its data is filtered out
     """
     e = ElementFactory.from_json(ALL_JSON)
     parent = html.fragment_fromstring("""
     <section>
         <div><a href=\"#foo\">Foo</a></div>
         <div></div>
         <div><a href=\"#baz\">Baz</a></div>
         <div><a href=\"#quux\">Quux</a></div>
     </section>
     """)
     data = e.data(parent)
     divs = data.get("divs")
     self.assertEqual(len(divs), 3)
Пример #41
0
    def test_single_child_single_optional_fail(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT)
        single_copy["children"].append(alt_single_copy)
        alt_single_copy["optional"] = True
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        outputs = [
            {
                "title": "The Dark Forest",
                "text": "The Second Novel"
            }
        ]

        for o in outputs:
            self.assertIsNotNone(differences(o, flattened))
Пример #42
0
    def test_single_child_single_fail(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT)
        single_copy["children"].append(alt_single_copy)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        outputs = [{
            "title": "The Dark Forest",
            "url": "http://www.liucixin.com/the_dark_forest",
        }, {
            "url": "http://www.liucixin.com/the_dark_forest",
            "text": "The Second Novel"
        }]

        for o in outputs:
            self.assertIsNotNone(differences(o, flattened))
Пример #43
0
 def test_all_data_doesnt_exist(self):
     """
     when all matched elements return None, return an empty list
     """
     e = ElementFactory.from_json(RANGE_JSON)
     parent = html.fragment_fromstring("""
     <section>
         <div></div>
         <div></div>
         <div><a href=\"#\">Test</a></div>
         <div><a href=\"#\">Test</a></div>
     </section>
     """)
     data = e.data(parent)
     divs = data.get("divs")
     self.assertIsInstance(divs, list)
     self.assertEqual(len(divs), 0)
Пример #44
0
 def test_all_data_none(self):
     """
     when every element returns None, data will be an empty list
     """
     e = ElementFactory.from_json(ALL_JSON)
     parent = html.fragment_fromstring("""
     <section>
         <div><a>Foo</a></div>
         <div><a>Bar</a></div>
         <div><a>Baz</a></div>
         <div><a>Quux</a></div>
     </section>
     """)
     data = e.data(parent)
     divs = data.get("divs")
     self.assertIsInstance(divs, list)
     self.assertEqual(len(divs), 0)
Пример #45
0
    def test_all_optional_child_all(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT)
        all_copy["children"].append(alt_all_copy)
        all_copy["optional"] = True
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        # "items", dict, True
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, True)

        expected_item_keys = [
            ("count", int, True)
        ]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)

        # "paragraphs", dict, True
        paragraphs_tuple = items.get("paragraphs")
        paragraphs, paragraphs_optional = paragraphs_tuple
        self.assertIsInstance(paragraphs, dict)
        self.assertEqual(paragraphs_optional, True)

        expected_paragraph_keys = [
            ("description", str, True)
        ]

        for item in expected_paragraph_keys:
            name, expected_type, expected_optional = item
            rule = paragraphs.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #46
0
 def test_child_element_doesnt_exist(self):
     """
     when a child element doesn't exist, its data is filtered out
     """
     e = ElementFactory.from_json(ALL_JSON)
     parent = html.fragment_fromstring(
         """
     <section>
         <div><a href=\"#foo\">Foo</a></div>
         <div></div>
         <div><a href=\"#baz\">Baz</a></div>
         <div><a href=\"#quux\">Quux</a></div>
     </section>
     """
     )
     data = e.data(parent)
     divs = data.get("divs")
     self.assertEqual(len(divs), 3)
Пример #47
0
 def test_rule_no_attr(self):
     """
     when an element returns None, it is filtered out
     """
     e = ElementFactory.from_json(RANGE_JSON)
     parent = html.fragment_fromstring(
         """
     <section>
         <div><a href=\"#\">Test</a></div>
         <div><a>Test</a></div>
         <div><a href=\"#\">Test</a></div>
         <div><a href=\"#\">Test</a></div>
     </section>
     """
     )
     data = e.data(parent)
     divs = data.get("divs")
     self.assertEqual(len(divs), 1)
Пример #48
0
 def test_children_data(self):
     """
     child data is merged into the datum for each element
     """
     e = ElementFactory.from_json(RANGE_JSON)
     parent = html.fragment_fromstring("""
     <section>
         <div><a href=\"#\">Test</a></div>
         <div><a>Test</a></div>
         <div><a href=\"#\">Test</a></div>
         <div><a href=\"#\">Test</a></div>
     </section>
     """)
     data = e.data(parent)
     self.assertIn("divs", data)
     for div in data.get("divs"):
         self.assertIn("link", div)
         self.assertIn("headline", div)
Пример #49
0
    def test_single(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        expected = [
            ("url", str, False),
            ("title", str, False)
        ]

        for item in expected:
            name, expected_type, expected_optional = item
            rule = flattened.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #50
0
 def test_all_data_none(self):
     """
     when every element returns None, data will be an empty list
     """
     e = ElementFactory.from_json(ALL_JSON)
     parent = html.fragment_fromstring(
         """
     <section>
         <div><a>Foo</a></div>
         <div><a>Bar</a></div>
         <div><a>Baz</a></div>
         <div><a>Quux</a></div>
     </section>
     """
     )
     data = e.data(parent)
     divs = data.get("divs")
     self.assertIsInstance(divs, list)
     self.assertEqual(len(divs), 0)
Пример #51
0
 def test_all_data_doesnt_exist(self):
     """
     when all matched elements return None, return an empty list
     """
     e = ElementFactory.from_json(RANGE_JSON)
     parent = html.fragment_fromstring(
         """
     <section>
         <div></div>
         <div></div>
         <div><a href=\"#\">Test</a></div>
         <div><a href=\"#\">Test</a></div>
     </section>
     """
     )
     data = e.data(parent)
     divs = data.get("divs")
     self.assertIsInstance(divs, list)
     self.assertEqual(len(divs), 0)
Пример #52
0
 def test_children_data(self):
     """
     child data is merged into the datum for each element
     """
     e = ElementFactory.from_json(RANGE_JSON)
     parent = html.fragment_fromstring(
         """
     <section>
         <div><a href=\"#\">Test</a></div>
         <div><a>Test</a></div>
         <div><a href=\"#\">Test</a></div>
         <div><a href=\"#\">Test</a></div>
     </section>
     """
     )
     data = e.data(parent)
     self.assertIn("divs", data)
     for div in data.get("divs"):
         self.assertIn("link", div)
         self.assertIn("headline", div)
Пример #53
0
    def test_optional_single_element(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        single_copy["optional"] = True
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        outputs = [
            {
                "title": "Nightfall",
                "url": "http://www.isaacasimov.com/nightfall"
            },
            {
                "url": "http://www.isaacasimov.com/nightfall"
            },
            {
                "title": "Nightfall",
            },
            {}
        ]
        for o in outputs:
            self.assertIsNone(differences(o, flattened))
Пример #54
0
 def test_children_data(self):
     """
     the data from child elements is merged into the datum for
     each element
     """
     e = ElementFactory.from_json(ALL_JSON)
     parent = html.fragment_fromstring(
         """
     <section>
         <div><a href=\"#foo\">Foo</a></div>
         <div><a href=\"#bar\">Bar</a></div>
         <div><a href=\"#baz\">Baz</a></div>
         <div><a href=\"#quux\">Quux</a></div>
     </section>
     """
     )
     data = e.data(parent)
     self.assertIn("divs", data)
     for div in data.get("divs"):
         self.assertIn("link", div)
         self.assertIn("headline", div)
Пример #55
0
    def test_single_child_single_optional(self):
        single_copy = copy.deepcopy(SINGLE_ELEMENT)
        alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT)
        single_copy["children"].append(alt_single_copy)
        alt_single_copy["optional"] = True
        ele = ElementFactory.from_json(single_copy)
        flattened = flatten_element(ele)

        expected = [
            ("url", str, False),
            ("title", str, False),
            ("text", str, True)
        ]

        for item in expected:
            name, expected_type, expected_optional = item
            rule = flattened.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #56
0
    def test_all(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        # "items", dict, False
        items_tuple = flattened.get("items")
        items, items_optional = items_tuple
        self.assertIsInstance(items, dict)
        self.assertEqual(items_optional, False)

        expected_item_keys = [
            ("count", int, False)
        ]

        for item in expected_item_keys:
            name, expected_type, expected_optional = item
            rule = items.get(name)
            self.assertIsNotNone(rule)

            _type, optional = rule
            self.assertIs(_type, expected_type)
            self.assertEqual(optional, expected_optional)
Пример #57
0
    def test_all_child_all(self):
        all_copy = copy.deepcopy(ALL_ELEMENT)
        alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT)
        all_copy["children"].append(alt_all_copy)
        ele = ElementFactory.from_json(all_copy)
        flattened = flatten_element(ele)

        output = {
            "items": [
                {
                    "count": 6,
                    "paragraphs": [
                        {
                            "description": "foo"
                        },
                        {
                            "description": "bar"
                        }
                    ]
                }
            ]
        }

        self.assertIsNone(differences(output, flattened))
Пример #58
0
 def test_empty_from_json(self):
     # an empty Element has no rules or children
     # creating it should fail
     empty_json = {"selector": "div", "children": [], "rules": [], "spec": {"type": "single", "index": 0}}
     with self.assertRaises(ValueError):
         ElementFactory.from_json(empty_json)
Пример #59
0
 def test_optional_from_json(self):
     single_copy = SINGLE_JSON.copy()
     single_copy["optional"] = True
     e = ElementFactory.from_json(single_copy)
     self.assertTrue(e.optional)
Пример #60
0
 def test_creates_rule_and_child_objects(self):
     e = ElementFactory.from_json(SINGLE_JSON)
     self.assertEqual(len(e.rules), 1)
     self.assertIsInstance(e.rules[0], Rule)
     self.assertEqual(e.spec.get("type"), "single")
     self.assertEqual(e.spec.get("index"), 0)