Exemplo n.º 1
0
 def download(self,feed,url,filename,extname=None):
     if feed.startswith('csdn'):
         true_url = url.split('?')[0]
     else:
         true_url = url
     logger.info('downloading image \'%s\'',url)
     try:
         req = urllib2.Request(true_url)
         req.add_header('User-Agent',USER_AGENT)
         resp = urllib2.urlopen(req,None,DOWNLOAD_TIMEOUT)
         #resp = urlopen(true_url)
         #logger.debug(resp.getcode())
         data = resp.read(-1)
         resp.close()
         if extname == None:
             #in python3
             #content_type = resp.getheader('Content-Type')
             #in python2
             content_type = resp.info().getheader('Content-Type').lower()
             extname = self.get_extname(content_type)
         if extname == None:
             logger.warning('unsupported content type \'%s\'',content_type)
             return (None, STATE_BADURL, 'unsupported content type \''+content_type+'\'')
         file_fullname = filename+extname
         fn = os.path.join(self.image_dir,os.path.join(file_fullname[:1],os.path.join(file_fullname[1:2],file_fullname)))
         f = open(fn,'wb')
         f.write(data)
         f.close()
         return (file_fullname, STATE_SUCCESS, 'success')
     except Exception as e:
         logger.error('an error accur while downloading %s',url);
         logger.exception(e)
         return (None, STATE_NETWORK_ERROR, 'network error')
Exemplo n.º 2
0
 def fetch_feed(self,feed):
     feed_name = feed['name']
     try:
         logger.info('start fetch feed \'%s\'', feed_name)
         fp = feedparser.parse(feed['url'])
         if len(fp.entries) > 0:
             feed['type'] = fp.version[0:3]
             logger.info('get %d articles from feed \'%s\'',len(fp.entries),feed_name)
             self.store(feed,fp.entries)
         else:
             logger.info('get nothing from feed \'%s\'',feed_name)
     except Exception as e:
         logger.error('get error while fetching feed \'%s\'',feed_name)
         logger.exception(e)