def read(self): if self.is_crawl_time() == False: return 0 urls = Feed.select().execute() result = [] entries = [] news_count = 0 nw = NewsWorker() for url in urls: try: try: Setting.log.info("start crawing for "+url.url) except: pass d = feedparser.parse(url.url) rss_title = encode_to_utf8(d['feed']['title']) for item in d.entries: gmt_date,persian_date = self.compute_dates(item.published, item.published_parsed) date,time = self.get_jalali_datetime(persian_date) if nw.add_news(item.title, item.link, url.id, gmt_date) : news_count += 1 except Exception as ex: Setting.log.log_exception(ex,'rss reading main loop: ') self.set_crawl_log(news_count) return news_count
def read(self): if self.is_crawl_time() == False: return 0 urls = Feed.select().execute() result = [] entries = [] news_count = 0 nw = NewsWorker() for url in urls: try: try: Setting.log.info("start crawing for " + url.url) except: pass d = feedparser.parse(url.url) rss_title = encode_to_utf8(d['feed']['title']) for item in d.entries: gmt_date, persian_date = self.compute_dates( item.published, item.published_parsed) date, time = self.get_jalali_datetime(persian_date) if nw.add_news(item.title, item.link, url.id, gmt_date): news_count += 1 except Exception as ex: Setting.log.log_exception(ex, 'rss reading main loop: ') self.set_crawl_log(news_count) return news_count