Python get_hash示例

编程语言: Python

命名空间/包名称: scrapers.utils

方法/功能: get_hash

hotexamples.com的示例: 3

Python get_hash - 已找到3个示例。这些是从开源项目中提取的最受好评的scrapers.utils.get_hash现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： zurnal_scraper.py 项目： izacus/newsbuddy

    def parse_source(self, existing_ids=None):
        news = []
        feed_content = get_rss(self.ZURNAL_RSS_URL)
        for feed_entry in feed_content.entries:
            link = feed_entry["link"]

            if existing_ids and (get_hash(link) in existing_ids
                                 or get_sha_hash(link) in existing_ids):
                logger.debug("Skipping %s", link)
                continue

            published_date = time_to_datetime(feed_entry["published_parsed"])
            news.append((link, {"published": published_date}))

        return news

示例#2

显示文件

文件： rtv_scraper.py 项目： izacus/newsbuddy

    def parse_source(self, existing_ids=None):
        news = []
        for rss_feed in self.RTV_RSS_URLS:
            logger.debug("Parsing %s", rss_feed)
            feed_content = get_rss(rss_feed)
            for feed_entry in feed_content.entries:
                # Download article
                link = feed_entry["link"]

                if existing_ids and (get_hash(link) in existing_ids
                                     or get_sha_hash(link) in existing_ids):
                    logger.debug("Skipping %s", link)
                    continue

                published_date = time_to_datetime(
                    feed_entry["published_parsed"])
                news.append((link, {"published": published_date}))

        return news

示例#3

显示文件

    def parse_source(self, existing_ids=None):
        news = []
        feed_content = get_rss(self.DNEVNIK_RSS_URL)

        max_counter = 30
        for feed_entry in feed_content.entries:
            link = feed_entry["link"]

            if existing_ids and (get_hash(link) in existing_ids
                                 or get_sha_hash(link) in existing_ids):
                logger.debug("Skipping %s", link)
                continue

            published_date = time_to_datetime(feed_entry["published_parsed"])
            title = feed_entry["title"]
            news.append((link, {"published": published_date, "title": title}))

            max_counter -= 1
            if max_counter <= 0:
                break

        return news