Пример #1
0
 def read(self):
     if self.is_crawl_time() == False:
         return 0
    
     urls = Feed.select().execute()        
     result = []
     entries = []
     news_count = 0
     nw = NewsWorker()
     
     for url in urls:            
         try:
             try:
                 Setting.log.info("start crawing for "+url.url)
             except:
                 pass
             d = feedparser.parse(url.url)                
             rss_title = encode_to_utf8(d['feed']['title'])                
             for item in d.entries:
                 gmt_date,persian_date = self.compute_dates(item.published, item.published_parsed)
                 date,time = self.get_jalali_datetime(persian_date)
                 if nw.add_news(item.title, item.link, url.id, gmt_date) :
                     news_count += 1                          
         except Exception as ex:    
             Setting.log.log_exception(ex,'rss reading main loop: ')           
             
             
             
     self.set_crawl_log(news_count)
    
     return news_count
Пример #2
0
    def read(self):
        if self.is_crawl_time() == False:
            return 0

        urls = Feed.select().execute()
        result = []
        entries = []
        news_count = 0
        nw = NewsWorker()

        for url in urls:
            try:
                try:
                    Setting.log.info("start crawing for " + url.url)
                except:
                    pass
                d = feedparser.parse(url.url)
                rss_title = encode_to_utf8(d['feed']['title'])
                for item in d.entries:
                    gmt_date, persian_date = self.compute_dates(
                        item.published, item.published_parsed)
                    date, time = self.get_jalali_datetime(persian_date)
                    if nw.add_news(item.title, item.link, url.id, gmt_date):
                        news_count += 1
            except Exception as ex:
                Setting.log.log_exception(ex, 'rss reading main loop: ')

        self.set_crawl_log(news_count)

        return news_count