示例#1
0
    def download(obj):
        logger.debug('download url: '+obj['url'])
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36',
                'Proxy-Connection': 'keep-alive',
                'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6',
                'Accept-Encoding': 'gzip,deflate,sdch',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Cache-Control': 'max-age=0',
                'Cookie': 'UTMPKEY=14367518; UTMPNUM=13792; UTMPUSERID=guest; LOGINTIME=1396536377'
            }
            proxies = None
            if gConfig["downloadUsingProxy"]["enabled"]:
                proxies = gConfig["downloadUsingProxy"]["proxies"]
            r = requests.get(obj['url'], headers=headers, proxies=proxies, timeout=gConfig['downloadTimeout'])
            r.raise_for_status()    # 如果响应状态码不是 200,就主动抛出异常

        except requests.Timeout as e:
            logger.warning('download file timeout! file url: '+obj['url'])
        except Exception as e:
            logger.exception(e.message)
            # namespace.emit('downloadError', obj)
            return None
        else:
            # save file
            if(obj.get('saveat') == 'upyun'):
                return UpyunUtil.uploadStream(r.content, obj)
            else:
                saveurl = writeFile(obj['savename'], obj.get('savedir'), r.content, 'wb')
                obj['saveurl'] = saveurl
                return obj
示例#2
0
 def on_addHtml(self, data):
     logger.debug('addHtml: '+simplejson.dumps(data))
     gUrls.add(data)
示例#3
0
 def scheduler(itemCount, itemSize, fileSize):
     percent = 100.0 * itemCount * itemSize / fileSize
     if percent > 100:
         percent = 100
     logger.debug('percent: %.2f%%' % percent)