示例#1
0
文件: tests.py 项目: xecgr/kafka
    def test_01_insert_init_data(self):
        """sample domain insertion test"""
        _domains = dict(domains)
        for name, url in _domains.items():
            d = Domain(domain=name, url=url)
            d.save()

        for d in Domain.get_by_filters():
            _domains.pop(d.domain, None)
        self.assertEqual(_domains, {})
示例#2
0
文件: tests.py 项目: xecgr/kafka
 def test_03_regexp(self):
     """regexp process test"""
     d = Domain(domain='dummy', url='dummy')
     d.save()
     s = Snapshot(
         domain_id=d.id,
         pulled_at=datetime.utcnow(),
         html="find me with a regexp!!",
     )
     s.save()
     dc = DomainCheck(domain_id=d.id,
                      name="dummy_check",
                      regexp="find (me|you)")
     dc.save()
     run_regexp(snapshot_id=s.id)
     scd = SnapshotCheckData.get_by_filters(check_id=dc.id)[0]
     self.assertEqual(json.loads(scd.check_value), ["me"])
示例#3
0
if __name__ == "__main__":
    domains = {
        "helsinkitimes": "https://www.helsinkitimes.fi/",
        "berlin": "https://www.berlin.de/en/news/",
        "9news": "https://www.9news.com.au/sydney",
        "fail!": "fail://fail.com",
    }
    domain_checks = {
        "helsinkitimes": ["covid(\\d+) ", "govern(\\w+)"],
    }
    delete_tables()
    create_tables()
    domain_ids = []
    for name, url in domains.items():
        d = Domain(domain=name, url=url)
        d.save()
        domain_ids.append(d.id)
        for idx, regexp in enumerate(domain_checks.get(name, [])):
            DomainCheck(
                domain_id=d.id,
                name=f"{name}-{idx}",
                regexp=regexp,
            ).save()

    for x in range(n_pulls):
        for d_id in domain_ids:
            print(f"sending collector task for {d_id}")
            send_task(topic=collector_topic, domain_id=d_id)
        time.sleep(sleep_between_pulls)