Python Domain.Domain示例

编程语言: Python

命名空间/包名称: db

类/类型: Domain

方法/功能: Domain

hotexamples.com的示例: 4

Python Domain.Domain - 已找到4个示例。这些是从开源项目中提取的最受好评的db.Domain.Domain现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Domain(4)

get_by_filters(3)

save(3)

select(2)

create(1)

delete(1)

normalize(1)

update(1)

示例#1

显示文件

文件： tests.py 项目： xecgr/kafka

    def test_01_insert_init_data(self):
        """sample domain insertion test"""
        _domains = dict(domains)
        for name, url in _domains.items():
            d = Domain(domain=name, url=url)
            d.save()

        for d in Domain.get_by_filters():
            _domains.pop(d.domain, None)
        self.assertEqual(_domains, {})

示例#2

显示文件

    def run(self):
        """
        Main function for thread that will
        """

        # It might be good to create a process rather than a thread per file, and to create a multithreading environment
        # to process the list rather than a single thread for all of it (test under big file environment).

        # One json per line
        my_list = self.buf_file.splitlines()

        for line in my_list:
            # Ignoring if there is an empty entrance in the list
            if line:

                # load json
                json_line = json.loads(line)

                # we are going to look for creative_size, if it does not exist we will get the information from ad_width
                # and ad_height
                creative_size = find_key("creative_size", json_line)
                if not creative_size:
                    value_width = find_key("ad_width", json_line)
                    value_height = find_key("ad_height", json_line)
                    if value_width and value_height:
                        creative_size = [value_width[0] + "x" + value_height[0]]

                # We are going to look for the keys page_url and Referer
                referer = find_key("Referer", json_line)
                url = find_key("page_url", json_line)

                # If the three elements were found, introduce them in the DB.
                if creative_size and referer and url:
                    with transaction() as session:
                        added = False
                        # Check the existence of the entrance before introducing a repetitive one
                        if not session.query(Domain).filter(Domain.url==url[0]).first():
                            session.add(Domain(url[0]))
                            added = True
                        if not session.query(Referer).filter(Referer.url==referer[0]).first():
                            session.add(Referer(referer[0]))
                            added = True

                        session.flush()

                        # If one of the previous tables has a new entry. No need to check of existence in here.
                        if added:
                            session.add(Information(domain_url=url[0], referer_url=referer[0], creative_size=creative_size[0]))

                        elif not session.query(Information).filter(Information.domain_url==url[0])\
                                .filter(Information.referer_url==referer[0])\
                                .filter(Information.creative_size==creative_size[0]).first():
                            session.add(Information(domain_url=url[0], referer_url=referer[0], creative_size=creative_size[0]))

        print "Database updated with information from file: %s" % self.f_name

示例#3

显示文件

文件： tests.py 项目： xecgr/kafka

 def test_03_regexp(self):
     """regexp process test"""
     d = Domain(domain='dummy', url='dummy')
     d.save()
     s = Snapshot(
         domain_id=d.id,
         pulled_at=datetime.utcnow(),
         html="find me with a regexp!!",
     )
     s.save()
     dc = DomainCheck(domain_id=d.id,
                      name="dummy_check",
                      regexp="find (me|you)")
     dc.save()
     run_regexp(snapshot_id=s.id)
     scd = SnapshotCheckData.get_by_filters(check_id=dc.id)[0]
     self.assertEqual(json.loads(scd.check_value), ["me"])

示例#4

显示文件

if __name__ == "__main__":
    domains = {
        "helsinkitimes": "https://www.helsinkitimes.fi/",
        "berlin": "https://www.berlin.de/en/news/",
        "9news": "https://www.9news.com.au/sydney",
        "fail!": "fail://fail.com",
    }
    domain_checks = {
        "helsinkitimes": ["covid(\\d+) ", "govern(\\w+)"],
    }
    delete_tables()
    create_tables()
    domain_ids = []
    for name, url in domains.items():
        d = Domain(domain=name, url=url)
        d.save()
        domain_ids.append(d.id)
        for idx, regexp in enumerate(domain_checks.get(name, [])):
            DomainCheck(
                domain_id=d.id,
                name=f"{name}-{idx}",
                regexp=regexp,
            ).save()

    for x in range(n_pulls):
        for d_id in domain_ids:
            print(f"sending collector task for {d_id}")
            send_task(topic=collector_topic, domain_id=d_id)
        time.sleep(sleep_between_pulls)