Exemplo n.º 1
0
    def test_drop_duplicate(self):
        with tempfile.TemporaryDirectory() as root:
            root = Path(root)

            storage = FileStorage(root)

            storage.insert_many("test", self.items)

            pipeline = DropDuplicate(storage, "test")

            item = Item("123", {"x": 123})
            assert pipeline(item) is None

            item = Item("789", {"x": 789})
            assert pipeline(item) is item
Exemplo n.º 2
0
    def test_overwrite_true():
        with tempfile.TemporaryDirectory() as root:
            root = Path(root)

            storage = FileStorage(root)
            pipeline = StoreItem(storage, "test", overwrite=True)

            item = Item("123", {"x": 456})
            pipeline(item)

            item = Item("123", {"x": 789})
            pipeline(item)

            stored_item = storage.get("test", "123")
            assert stored_item["x"] == 789
Exemplo n.º 3
0
    def test_update(self):
        with tempfile.TemporaryDirectory() as root:
            root = Path(root)

            storage = FileStorage(root)
            storage.insert_many("test", self.data)

            new_data = [
                Item("123", {"text": "xfoo"}),
                Item("456", {"text": "xbar"})
            ]

            storage.update_many("test", new_data)

            assert storage.get("test", "123")["text"] == "xfoo"
            assert storage.get("test", "456")["text"] == "xbar"
Exemplo n.º 4
0
def test_item():
    item = Item("123", {})

    item["foo"] = {"x": 123}
    item["bar"] = {"y": 456}
    item["bar"]["y"] = 789

    assert item.id == "123"
    assert len(item) == 2
    assert item["foo"]["x"] == 123
    assert item["bar"]["y"] == 789
Exemplo n.º 5
0
 def setup(self):
     # pylint:disable=attribute-defined-outside-init
     self.item = Item("123", {"x": 456})
Exemplo n.º 6
0
 def _get_item(self, response: httpx.Response) -> Item:
     item_id = str(response.url)
     content = {"text": self._get_text(response)}
     return Item(item_id, content)
Exemplo n.º 7
0
 def _build_item(video_dict: tp.Dict[str, tp.Any]) -> Item:
     item_id = video_dict["id"]
     content = video_dict
     return Item(item_id, content)
Exemplo n.º 8
0
 def setup(self):
     # pylint:disable=attribute-defined-outside-init
     self.data = [
         Item("123", {"text": "foo"}),
         Item("456", {"text": "bar"})
     ]
Exemplo n.º 9
0
 def task(path, taskid):
     storage = TinyDBStorage(path)
     with storage as s:
         s.insert("test", Item(str(taskid), {"task": taskid}))
     return taskid
Exemplo n.º 10
0
 def _run(self) -> tp.Iterator[Item]:
     for tweets in self._crawl():
         for tweet in tweets:
             item_id = str(tweet["id"])
             content = tweet
             yield Item(item_id, content)