class GovBuy(object): '''内蒙政府采购网''' def __init__(self): name = 'neimeng_nmgp_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'http://www.nmgp.gov.cn/wp-content/themes/caigou_pcweb/skin/css/css.css?ver=2.0', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', } self.rq = Rdis_Queue(host='localhost', dblist='neimeng_list1', dbset='neimeng_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get(self,params): try: url = 'http://www.nmgp.gov.cn/zfcgwslave/web/index.php' response = requests.get(url=url, headers=self.headers,params=params).json() except: print('load_post error') else: if len(response) >= 1: response_li = response[0] else: return for ret_dict in response_li: if not self.rq.in_rset(ret_dict): self.rq.add_to_rset(ret_dict) self.rq.pull_to_rlist(ret_dict) def load_get_html(self,ret_dict): # print(ret_dict) if ret_dict == None: return try: ret = eval(ret_dict) url = 'http://www.nmgp.gov.cn/ay_post/post.php?tb_id=' + ret['ay_table_tag'] + '&p_id=' + ret['wp_mark_id'] response = requests.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: # print(ret) _id = self.hash_to_md5(url) title = ret['TITLE_ALL'] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' # print(title) publish_date = selector.xpath('//*[@id="info-box"]/span/text()') if publish_date != []: publish_date = re.search(r'\d+年\d+月\d+日',publish_date[0]).group() else: publish_date = None # print(publish_date) # return end_date = ret['ENDDATE'] soup = BeautifulSoup(response) content_html = soup.find(id='s-main-2').div.div # print(content_html) # print(content) source = 'http://www.nmgp.gov.cn/' area_name = self.get_area('内蒙古', title) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '内蒙古自治区政府采购网 ' retult_dict['en_name'] = 'NeiMengGu District Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: # self.load_get_html(self.rq.get_to_rlist()) spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ # {'type_name':1, 'all_page': 5268}, # {'type_name':2, 'all_page': 735}, # {'type_name':3, 'all_page': 4482}, # {'type_name':4, 'all_page': 101}, # {'type_name':5, 'all_page': 925}, # {'type_name':6, 'all_page': 2386}, # {'type_name':7, 'all_page': 101}, # {'type_name':8, 'all_page': 25}, {'type_name':1, 'all_page': 2}, {'type_name':2, 'all_page': 2}, {'type_name':3, 'all_page': 2}, {'type_name':4, 'all_page': 2}, {'type_name':5, 'all_page': 2}, {'type_name':6, 'all_page': 2}, {'type_name':7, 'all_page': 2}, {'type_name':8, 'all_page': 1}, ] for task in task_li: for page in range(1,task['all_page'] + 1): params = { 'r': 'zfcgw/anndata', 'type_name': task['type_name'], 'byf_page': str(page), 'fun': 'cggg', } if self.rq.r_len() > 8000: time.sleep(3) self.load_get(params) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''深圳政府采购网''' def __init__(self): name = 'shenzhen_zfcg_sz_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': '*/*', 'Referer': 'http://61.144.227.212/was5/web/search?page=4096^&channelid=261279^&orderby=-DOCRELTIME^&perpage=10^&outlinepage=5^&searchscope=^×cope=^×copecolumn=^&orderby=-DOCRELTIME^&chnlid=^&andsen=^&total=^&orsen=^&exclude=', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'Origin': 'http://61.144.227.212', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='shenzhen_list1', dbset='shenzhen_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def load_get_html(self, url): try: # print(url) response = requests.get( url=url, headers=self.headers).content.decode('gb2312') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: title = selector.xpath( '//*[@id="content"]/div/div[2]/div/h4/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//*[@id="content"]/div/div[2]/div/h6/label//text()') if publish_date != []: publish_date = re.search(r'(\d+\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None soup = BeautifulSoup(response) content_html = soup.find(class_='main') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = 'http://www.zfcg.sz.gov.cn/' retult_dict['area_name'] = '深圳' retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '深圳市政府采购监管网 ' retult_dict['en_name'] = 'Shenzhen Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, page): try: params = ( ('page', str(page)), ('channelid', '261279'), ('orderby', ['-DOCRELTIME', '-DOCRELTIME']), ('perpage', '10'), ('outlinepage', '5'), ('searchscope', ''), ('timescope', ''), ('timescopecolumn', ''), ('chnlid', ''), ('andsen', ''), ('total', ''), ('orsen', ''), ('exclude', ''), ) data = [ ('showother', 'false'), ('showtype', 'txt'), ('classnum', '20'), ('classcol', 'CTYPE'), ('channelid', '261279'), ('orderby', '-DOCRELTIME'), ] url = 'http://61.144.227.212/was5/web/search' response = self.session.post(url=url, headers=self.headers, params=params, data=data).content.decode('utf-8') selector = etree.HTML(response) url_li = selector.xpath('//div[@class="r_list"]/dl/dd/a/@href') print('第{}页'.format(page)) except: print('load_post error') else: for url in url_li: # print(url) if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: # self.load_get_html(self.rq.get_to_rlist()) spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ # {'all_page': 43879}, { 'all_page': 5 }, ] count = 3 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: spawns = [ gevent.spawn(self.load_get, page + i) for i in range(count) ] gevent.joinall(spawns) except: pass if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''陕西公共资源交易信息网''' def __init__(self): name = 'shaanxi_sxggzyjy_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.sxggzyjy.cn/jydt/001001/subPage_jyxx.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='shaanxip_list1', dbset='shaanxip_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) print(url) # self.load_get_html(url) else: title = selector.xpath('//h3[@class="article-title"]/text()') if title != []: title = re.sub(r'\r|\n|\s', '', title[0]) try: status = re.search( r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath( '//div[@class="info-source"]//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})', ''.join(publish_date)).group() # publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date) area_name = self.get_area('陕西', title) # area_name = '四川-成都' # print(area_name) source = 'http://www.sxggzyjy.cn/' table_ele = selector.xpath('//div[@class="ewb-main"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '陕西省公共资源交易中心' retult_dict['en_name'] = 'Shaanxi Province Public resource' # print(retult_dict) # print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: url = 'http://www.sxggzyjy.cn/jydt/001001/{}.html'.format(page) response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) url_li = selector.xpath('//ul[@class="ewb-list"]/li/a/@href') for url in url_li: urls = 'http://www.sxggzyjy.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ # {'categoryId':'', 'types':'','all_page': 1845}, { 'categoryId': '', 'types': '', 'all_page': 2 }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''苏州公共资源交易信息网''' def __init__(self): name = 'suzhou_szzyjy_fwzx_suzhou_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'X-DevTools-Emulate-Network-Conditions-Client-Id': '06AB3D9C05E9FDAB1EDDAD36BA60296F', 'Referer': 'http://ggzy.hefei.gov.cn/jyxx/002001/002001001/3.html', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='suzhou_szzyjy_fwzx_suzhou_gov_cn_list1', dbset='suzhou_szzyjy_fwzx_suzhou_gov_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: # selector_div = etree.HTML(str(div)) response = requests.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: title = selector.xpath('//h2[@class="word-title"]/text()') if title != []: title = re.sub(r'\r|\n|\s','',''.join(title)) try: status = re.search(r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath('//h4[@class="word-info"]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None print(publish_date, title) # area_name = self.get_area() area_name = '江苏-苏州' source = 'http://szzyjy.fwzx.suzhou.gov.cn' table_ele = selector.xpath('//div[@class="border"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '苏州市公共资源交易中心' retult_dict['en_name'] = 'Suzhou City Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self,categoryId, types, page): try: params = ( ('paging', page), ) url = 'http://szzyjy.fwzx.suzhou.gov.cn/Front/jyzx/{}/'.format(types) response = requests.get(url=url, headers=self.headers, params=params).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//ul[@class="ewb-right-item"]/li') try: url_li = selector.xpath('//*[@class="mr-content"]/div[1]/table/tr/td[1]/a/@href') except: time.sleep(3) self.load_get(categoryId, types, page) # for div_ele in div_ele_li: for url in url_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') urls = 'http://szzyjy.fwzx.suzhou.gov.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ {'categoryId':'', 'types':'002004/002004001','all_page': 2}, {'categoryId':'', 'types':'002004/002004002','all_page': 2}, {'categoryId':'', 'types':'002004/002004003','all_page': 2}, {'categoryId':'', 'types':'002004/002004004','all_page': 1}, {'categoryId':'', 'types':'002004/002004005','all_page': 2}, {'categoryId':'', 'types':'002004/002004006','all_page': 1}, ] count = 1 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''山西采购电子商城''' def __init__(self): name = 'shanxi_sxzfcg_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.sxzfcg.cn/view.php?nav=61', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='shanxi_sxzfcg_cn_list1', dbset='shanxi_sxzfcg_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') print(url) selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//div[@valign="middle"]/h2/text()') if title != []: title = re.sub(r'\r|\n|\s', '', ''.join(title)) try: status = re.search( r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath('//td[@bgcolor="#E6E6E6"]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.sub( r'年|月', '-', re.search(r'(\d{8}|\d{4}年\d+月\d{1,2})', ''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date, title) area_name = '山西' # print(area_name) source = 'http://www.sxzfcg.cn/' # print(url) # print(response) table_ele = selector.xpath('//td[@class="c_pt"]/table/tr[2]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '山西省省级政府采购中心' retult_dict['en_name'] = 'Shanxi Government Procurement Center' print(publish_date) # print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: params = ( ('nav', types), ('page', page), ) url = 'http://www.sxzfcg.cn/view.php' response = requests.get(url=url, headers=self.headers, params=params).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//tbody[@id="bulletininfotable_table_body"]/tr') url_li = selector.xpath('//tr[@class="odd"]/td/a/@href') # for div_ele in div_ele_li: for url in url_li: urls = 'http://www.sxzfcg.cn/{}'.format(url) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() flag = 1 task_li = [ { 'categoryId': '', 'types': '61', 'all_page': flag }, { 'categoryId': '', 'types': '62', 'all_page': flag }, { 'categoryId': '', 'types': '63', 'all_page': flag }, { 'categoryId': '', 'types': '64', 'all_page': flag }, { 'categoryId': '', 'types': '65', 'all_page': flag }, { 'categoryId': '', 'types': '66', 'all_page': flag }, { 'categoryId': '', 'types': '67', 'all_page': flag }, { 'categoryId': '', 'types': '68', 'all_page': flag }, { 'categoryId': '', 'types': '69', 'all_page': flag }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''江苏政府采购网''' def __init__(self): name = 'jiangsu_ccgp-jiangsu_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/index_1.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='jiangsu_list1', dbset='jiangsu_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) else: title = selector.xpath('//div[@class="dtit"]/h1/text()') if title != []: title = re.sub(r'\r|\n|\s', '', title[0]) try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath( '//div[@class="detail_bz"]/span/text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None # print(publish_date) area_name = self.get_area('江苏', title) # print(area_name) source = 'http://www.ccgp-jiangsu.gov.cn/' table = selector.xpath('//div[@class="detail"]') if table != []: table = table[0] else: return content_html = etree.tostring(table, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') # print(content_html) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '江苏政府采购网' retult_dict['en_name'] = 'Jiangsu Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, base_url, page): try: if page == 0: url = base_url else: url = base_url + 'index_' + str(page) + '.html' response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except: print('load_post error') # self.load_get(url) else: # print('第{}页'.format(page)) url_li = selector.xpath('//div[@class="list_list"]/ul/li/a/@href') if url_li == []: url_li = selector.xpath( '//div[@class="list_list02"]/ul/li/a/@href') for url in url_li: urls = base_url + url.replace('./', '') # print(urls) # self.load_get_html((urls)) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 8 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() flag = 2 task_li = [ { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cgyg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/htgg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/xqyj/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/ysgg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/shengji/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/suzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/nanjing/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/wuxi/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/changzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/zhenjiang/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/nantong/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/yangzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/yancheng/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/huaian/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/suqian/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/lianyungang/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cggg/xuzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/shengji/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/suzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/nanjing/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/wuxi/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/changzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/zhenjiang/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/nantong/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/taizhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/yangzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/yancheng/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/huaian/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/suqian/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/lianyungang/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/gzgg/xuzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/shengji/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/suzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/nanjing/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/wuxi/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/changzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/zhenjiang/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/nantong/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/taizhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/yangzhou/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/yancheng/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/huaian/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/suqian/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/lianyungang/', 'all_page': flag }, { 'url': 'http://www.ccgp-jiangsu.gov.cn/cgxx/cjgg/xuzhou/', 'all_page': flag }, ] count = 3 for task in task_li: for page in range(0, task['all_page'] + 1, count): try: base_url = task['url'] # self.load_get(base_url, page) spawns = [ gevent.spawn(self.load_get, base_url, page + i) for i in range(count) ] gevent.joinall(spawns) print('第{}页'.format(page)) except Exception as e: print(e) def main(self): self.run()
class GovBuy(object): '''重庆政府采购网''' def __init__(self): name = 'chongqing_cqgp_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'application/json, text/plain, */*', 'Referer': 'https://www.cqgp.gov.cn/notices/list?source=41,42^&area=^%^E9^%^87^%^8D^%^E5^%^BA^%^86^%^E5^%^B8^%^82^&purches=^%^E9^%^87^%^87^%^E8^%^B4^%^AD^%^E5^%^85^%^AC^%^E5^%^91^%^8A', 'Connection': 'keep-alive', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='chongqing_list1', dbset='chongqing_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, pid): if pid == None: return try: proxies = proxy_pool.proxies() url = 'https://www.cqgp.gov.cn/gwebsite/api/v1/notices/stable/{}'.format( pid) response = requests.get(url=url, headers=self.headers, proxies=proxies, timeout=10).json() # selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) self.load_get_html(pid) else: title = response['notice']['title'] try: status = response['notice']['projectPurchaseWayName'] except: status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) # publish_date = selector.xpath('//div[@class="content_about"]/span[2]/em/text()') publish_date = response['notice']['issueTime'] if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})', publish_date).group() else: publish_date = None # print(publish_date) area_name = '重庆' # print(area_name) source = 'https://www.cqgp.gov.cn/' content_html = response['notice']['html'] retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '重庆市政府采购网' retult_dict['en_name'] = 'Chongqing City Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, page): try: params = ( ('pi', page), ('ps', '20'), ('timestamp', str(int(time.time() * 1000))), ) proxies = proxy_pool.proxies() url = 'https://www.cqgp.gov.cn/gwebsite/api/v1/notices/stable' response = requests.get(url=url, headers=self.headers, params=params, proxies=proxies, timeout=5).json() # selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) self.load_get(page) else: print('第{}页'.format(page)) response_li = response['notices'] for data_dict in response_li: pid = data_dict['id'] # print(pid) # self.load_get_html(pid) # time.sleep(2) if not self.rq.in_rset(pid): self.rq.add_to_rset(pid) self.rq.pull_to_rlist(pid) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ # {'all_page': 18647}, { 'all_page': 3 }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: # self.load_get(types, page) spawns = [ gevent.spawn(self.load_get, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) def main(self): self.run()
class GovBuy(object): '''上海公共资源交易信息网''' def __init__(self): name = 'shanghai_ztb_shmh_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'X-DevTools-Emulate-Network-Conditions-Client-Id': 'C30FE2988AF840A005E144C01A1874D4', 'Referer': 'http://ztb.shmh.gov.cn/mhztb_site/html/shmhztb_subject/shmhztb_subject_zfcg_cggg/List/list_350.htm', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='shanghai_ztb_shmh_gov_cn_list1', dbset='shanghai_ztb_shmh_gov_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: # selector_div = etree.HTML(str(div)) response = requests.get( url=url, headers=self.headers).content.decode('gb18030') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//div[@class="title"]/h2/text()') if title != []: title = re.sub(r'\r|\n|\s', '', ''.join(title)) try: status = re.search( r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath('//div[@class="title"]/h3//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})', ''.join(publish_date)).group() # publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date, title) area_name = '上海' # area_name = '浙江-杭州' # print(area_name) source = 'http://ztb.shmh.gov.cn/' # print(url) # print(response) table_ele = selector.xpath('//div[@class="list_right"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '上海市闵行区公共资源交易网' retult_dict['en_name'] = 'Minhang District Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: url = 'http://ztb.shmh.gov.cn/mhztb_site/html/shmhztb_subject/{}/List/list_{}.htm'.format( types, page) response = requests.get( url=url, headers=self.headers).content.decode('gb18030') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//ul[@class="ewb-right-item"]/li') url_li = selector.xpath('//ul[@id="list_ul"]/li/a/@href') # for div_ele in div_ele_li: for url in url_li: # response_li = response['result']['records'] # for data_dic in response_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') urls = 'http://ztb.shmh.gov.cn/mhztb_site/html/shmhztb_subject/' + re.sub( '\.\.\/\.\.\/', '', url) # print(data_dic) # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ { 'categoryId': '', 'types': 'shmhztb_subject_zfcg_cggg', 'all_page': 2 }, { 'categoryId': '', 'types': 'shmhztb_subject_zfcg_jggg', 'all_page': 2 }, { 'categoryId': '', 'types': 'shmhztb_subject_zfcg_dyly', 'all_page': 1 }, { 'categoryId': '', 'types': 'shmhztb_subject_jsgc_zbxx', 'all_page': 2 }, { 'categoryId': '', 'types': 'shmhztb_subject_jsgc_zgbxx', 'all_page': 1 }, { 'categoryId': '', 'types': 'shmhztb_subject_ggzy_jyxx', 'all_page': 1 }, { 'categoryId': '', 'types': 'shmhztb_subject_ggzy_cjxx', 'all_page': 1 }, { 'categoryId': '', 'types': 'shmhztb_subject_ggzy_cjxx', 'all_page': 1 }, ] count = 2 for task in task_li: for page in range(0, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''广西政府采购网''' def __init__(self): name = 'guangxi_gxzfcg_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http://www.gxzfcg.gov.cn', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'http://www.gxzfcg.gov.cn/CmsNewsController/search/chnlCodes-/distin-/beginDate-0/endDate-0/p-20/c-3/0-0.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'X-Requested-With': 'XMLHttpRequest', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='guangxi_list1', dbset='guangxi_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get(self, url): try: data = [ ('searchKey', ''), ('title', ''), ('str1', 'undefined'), ('str2', 'undefined'), ('cmsNews.title', ''), ('cmsNews.buyerName', ''), ('cmsNews.str2', ''), ('cmsNews.str3', ''), ('cmsNews.str1', ''), ('cmsNews.str5', ''), ('cmsNews.str6', ''), ('cmsNews.str8', ''), ('cmsNews.agentName', ''), ('cmsNews.startPubdate', ''), ('cmsNews.endPubdate', ''), ] response = self.session.post(url=url, headers=self.headers, data=data).content.decode('utf-8') selector = etree.HTML(response) url_li = selector.xpath( '//*[@id="channelBody"]/div[2]/ul/li/a/@href') except: print('load_post error') else: for url in url_li: url = 'http://www.gxzfcg.gov.cn' + url if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def load_get_html(self, url): try: response = self.session.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: # try: title = selector.xpath( '//*[@id="bodyMain"]/div/div/div[2]/div[2]/div[1]/h1/text()') # print(title) if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//*[@id="bodyMain"]/div/div/div[2]/div[2]/div[1]/span//text()' ) if publish_date != []: publish_date = re.search(r'(\d+\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None # print(publish_date) soup = BeautifulSoup(response) content_html = soup.find(class_='frameReport') if content_html == None: raise EOFError source = 'http://www.gxzfcg.gov.cn/' area_name = self.get_area('广西', title) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '广西壮族自治区政府采购网 ' retult_dict[ 'en_name'] = 'Guangxi Zhuang National Government Procurement' # print(retult_dict) # print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def init(self): count = 3 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: # self.load_get_html(self.rq.get_to_rlist()) spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ # {'all_page': 49876}, { 'all_page': 5 }, ] for task in task_li: for page in range(1, task['all_page'] + 1): url = 'http://www.gxzfcg.gov.cn/CmsNewsController/search/chnlCodes-/distin-/beginDate-0/endDate-0/p-20/c-' + str( page) + '/0-0.html' self.load_get(url) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''西藏政府采购网''' def __init__(self): name = 'xizang_ccgp-xizang_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http://www.ccgp-xizang.gov.cn', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.ccgp-xizang.gov.cn/shopHome/morePolicyNews.action?categoryId=124', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='xizang_list1', dbset='xizang_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) self.load_get_html(url) else: # print(response) title = selector.xpath('//h2[@class="sd"]/font/text()') if title != []: title = re.sub(r'\r|\n|\s', '', title[0]) try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath('//h3[@class="wzxq"]/text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})', ''.join(publish_date)).group() else: publish_date = None print(publish_date, title) # print (title) area_name = self.get_area('西藏', title) # print(area_name) source = 'http://www.ccgp-xizang.gov.cn/' table_ele = selector.xpath('//div[@class="neirong"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '西藏自治区政府采购网' retult_dict['en_name'] = 'Xizang Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, page): try: params = {'categoryId': categoryId} data = {'currentPage': str(page)} url = 'http://www.ccgp-xizang.gov.cn/shopHome/morePolicyNews.action' response = requests.post(url=url, headers=self.headers, params=params, data=data).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) self.load_get(categoryId, page) else: print('第{}页'.format(page)) url_li = selector.xpath( '//div[@id="news_div"]/ul/li/div[1]/a/@href') # print(url_li) # return for url in url_li: urls = 'http://www.ccgp-xizang.gov.cn' + url # print(urls) # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ { 'categoryId': '124', 'all_page': 2 }, { 'categoryId': '125', 'all_page': 2 }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] # self.load_get(categoryId, page) spawns = [ gevent.spawn(self.load_get, categoryId, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''乌鲁木齐公共资源交易信息网''' def __init__(self): name = 'wulumuqi_ggzy_wlmq_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://ggzy.wlmq.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Content-Type': 'text/plain', 'Accept': '*/*', 'Referer': 'http://ggzy.wlmq.gov.cn/generalpage.do?method=showList&fileType=201605-048&faname=201605-046', 'Connection': 'keep-alive', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='wulumuqi_list1', dbset='wulumuqi_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, info_id): if info_id == None: return try: url = 'http://ggzy.wlmq.gov.cn/infopublish.do?method=infoPublishView&infoid=' + info_id response = requests.get(url=url, headers=self.headers).content.decode('gb18030') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) print(url) # self.load_get_html(url) else: title = selector.xpath('//div[@class="title"]/text()') if title != '': title = re.sub(r'\r|\n|\s','',title[0]) try: status = re.search(r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath('//td[@class="td_name"]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.search(r'(\d{8}|\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date) # area_name = self.get_area('云南',title) area_name = '新疆-乌鲁木齐' # print(area_name) source = 'http://ggzy.wlmq.gov.cn/' table_ele = selector.xpath('//div[@class="w_content_main"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '乌鲁木齐市公共资源交易网' retult_dict['en_name'] = 'Urumqi City Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self,categoryId, types, page): try: data = 'callCount=1\n\npage=/generalpage.do?method=showList&fileType='+categoryId+'&faname=201605-046\n\nhttpSessionId=\n\nscriptSessionId=A0890501B5665F11F1222EBC440FC5FC644\n\nc0-scriptName=projectDWR\n\nc0-methodName=queryItemInfoByIndustryType2\n\nc0-id=0\n\nc0-e1=string:packTable\n\nc0-e2=string:'+categoryId+'\n\nc0-e3=number:'+str(page)+'\n\nc0-e4=string:15\n\nc0-e5=string:true\n\nc0-e6=string:packTable\n\nc0-e7=string:982\n\nc0-param0=Object_Object:{flag:reference:c0-e1, name:reference:c0-e2, currentPage:reference:c0-e3, pageSize:reference:c0-e4, isPage:reference:c0-e5, tabId:reference:c0-e6, totalRows:reference:c0-e7}\n\nbatchId=3\n\n' url = 'http://ggzy.wlmq.gov.cn/dwr/call/plaincall/projectDWR.queryItemInfoByIndustryType2.dwr' response = requests.post(url=url, headers=self.headers, data=data).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) info_id_il = re.findall(r"""\[\'FILE_ID\'\]\=\"(.*?)\"\;""", response) print(info_id_il) for pid in info_id_il: # print(info_id) # self.load_get_html(pid) if not self.rq.in_rset(pid): self.rq.add_to_rset(pid) self.rq.pull_to_rlist(pid) def init(self): count = 1 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ {'categoryId':'201605-048', 'types':'','all_page': 2}, {'categoryId':'201605-049', 'types':'','all_page': 1}, {'categoryId':'201605-050', 'types':'','all_page': 2}, {'categoryId':'201605-051', 'types':'','all_page': 1}, {'categoryId':'201605-052', 'types':'','all_page': 1}, {'categoryId':'201605-053', 'types':'','all_page': 1}, {'categoryId':'201605-039', 'types':'','all_page': 2}, {'categoryId':'201605-041', 'types':'','all_page': 1}, {'categoryId':'201605-042', 'types':'','all_page': 1}, {'categoryId':'201605-043', 'types':'','all_page': 2}, {'categoryId':'201605-044', 'types':'','all_page': 2}, {'categoryId':'201605-045', 'types':'','all_page': 2}, ] count = 3 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) def main(self): self.run()
class GovBuy(object): '''河北-政府采购网''' def __init__(self): name = 'hebei_ccgp-hebei_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'If-None-Match': '594gpnM6qpxwGpEvFYoNJpzY8YE=', 'If-Modified-Since': 'Mon, 23 Jul 2018 02:32:18 GMT', 'Referer': 'http://www.ccgp-hebei.gov.cn/province/cggg/zhbgg/index_3.html', 'X-DevTools-Emulate-Network-Conditions-Client-Id': 'F24524FAD50B25DB7D7D89DBCEA53767', 'Intervention': '<https://www.chromestatus.com/feature/5718547946799104>; level=warning', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='hebei_list1', dbset='hebei_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self,url): if url == None: return try: response = self.session.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) else: title = selector.xpath('//span[@class="txt2"]/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' publish_date = selector.xpath('//body/table/tr/td/table/tr[4]/td/table/tr[7]/td/span/text()') # print(publish_date) if publish_date !=[]: publish_date = re.sub(r'\r|\n|\s|发布时间:','',publish_date[0]) else: publish_date = None soup = BeautifulSoup(response) content_html = soup.find('body').table.tr.td.table # print(content_html) area_name = self.get_area('河北',title) source = 'http://www.ccgp-hebei.gov.cn/province/' _id = self.hash_to_md5(url) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '河北省政府采购网' retult_dict['en_name'] = 'Hebei Province Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, params): try: url = 'http://search.hebcz.gov.cn:8080/was5/web/search' response = self.session.get(url=url, headers=self.headers, params=params).text selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) else: url_li = selector.xpath('//tr[@id="biaoti"]/td[2]/a/@href') for url in url_li: # self.load_get_html(url) if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def init(self): count = 1 while self.is_running(): if self.rq.r_len() <= count: count = 10 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() # city_id_li = [ # '130100000','130181000','130200000','130300000','130400000','130500000', # '130600000','130682000','130700000','130800000','130900000','131000000','131100000', # '139900000'] count = 2 task_li = [ {'lanmu':'zhbgg','code':130000000,'all_page': count}, {'lanmu':'zbgg','code':130000000,'all_page': count}, {'lanmu':'zhbgg','code':130181000,'all_page': count}, {'lanmu':'zbgg','code':130181000,'all_page': count}, {'lanmu':'zhbgg','code':130200000,'all_page': count}, {'lanmu':'zbgg','code':130200000,'all_page': count}, {'lanmu':'zhbgg','code':130300000,'all_page': count}, {'lanmu':'zbgg','code':130300000,'all_page': count}, {'lanmu':'zhbgg','code':130400000,'all_page': count}, {'lanmu':'zbgg','code':130400000,'all_page': count}, {'lanmu':'zhbgg','code':130500000,'all_page': count}, {'lanmu':'zbgg','code':130500000,'all_page': count}, {'lanmu':'zhbgg','code':130600000,'all_page': count}, {'lanmu':'zbgg','code':130600000,'all_page': count}, {'lanmu':'zhbgg','code':130682000,'all_page': count}, {'lanmu':'zbgg','code':130682000,'all_page': count}, {'lanmu':'zhbgg','code':130700000,'all_page': count}, {'lanmu':'zbgg','code':130700000,'all_page': count}, {'lanmu':'zhbgg','code':130800000,'all_page': count}, {'lanmu':'zbgg','code':130800000,'all_page': count}, {'lanmu':'zhbgg','code':130900000,'all_page': count}, {'lanmu':'zbgg','code':130900000,'all_page': count}, {'lanmu':'zhbgg','code':131000000,'all_page': count}, {'lanmu':'zbgg','code':131000000,'all_page': count}, {'lanmu':'zhbgg','code':131100000,'all_page': count}, {'lanmu':'zbgg','code':131100000,'all_page': count}, {'lanmu':'zhbgg','code':139900000,'all_page': count}, {'lanmu':'zbgg','code':139900000,'all_page': count}, ] count = 1 for task in task_li: for page in range(1, task['all_page'] + 1, count): params = { 'page': str(page), 'channelid':'228483', 'perpage':'50', 'outlinepage':'10', 'lanmu': task['lanmu'], 'admindivcode': task['code'], } try: self.load_get(params) # spawns = [gevent.spawn(self.load_get, page + i) for i in range(count)] # gevent.joinall(spawns) except Exception as e: print(e) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''山西政府采购网''' def __init__(self): name = 'shanxi_ccgp-shanxi_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8', 'Referer': 'http://www.ccgp-shanxi.gov.cn/view.php?nav=104', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } self.rq = Rdis_Queue(host='localhost', dblist='shanxi_list1', dbset='shanxi_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get(self, params): try: url = 'http://www.ccgp-shanxi.gov.cn/view.php' response = requests.post(url=url, headers=self.headers, params=params).content.decode('utf-8') selector = etree.HTML(response) url_li = selector.xpath( '//*[@id="node_list"]/tbody/tr/td[1]/a/@href') except: print('load_post error') else: # print(url_li) if url_li != []: for url in url_li: url = 'http://www.ccgp-shanxi.gov.cn/' + url if not self.rq.in_rset(url): # pass self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def load_get_html(self, url): try: # print(url) response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except: print('laod_get_html error') else: # print(response) _id = self.hash_to_md5(url) # # print(_id) title = selector.xpath( '//tr[@class="bk5"]/td/table/tr/td/table/tr/td/div/h2/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) publish_date = selector.xpath( '//tr[@class="bk5"]/td/table/tr[2]/td//text()') # print(publish_date) if publish_date != []: publish_date = re.search(r'(\d+年\d+月\d+日)', publish_date[2]) if publish_date != []: publish_date = publish_date[0] else: publish_date = None else: publish_date = None # print(publish_date) soup = BeautifulSoup(response) content_html = soup.find(class_='bk5') # print(content_html) source = 'http://www.ccgp-shanxi.gov.cn/' area_name = self.get_area('山西', title) # retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '中国山西政府采购' retult_dict['en_name'] = 'Shanxi Government Procurement' # # print(retult_dict) # print('列表长度为={}'.format(self.rq.r_len())) # self.save_to_mongo(retult_dict) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) def run(self): threading.Thread(target=self.init).start() task_li = [ #{'nav':100, 'end_page':14705,'status':'招标公告'}, #{'nav':104, 'end_page':13667,'status':'结果公告'}, #{'nav':105, 'end_page':2291,'status':'变更公告'}, #{'nav':116, 'end_page':747,'status':'单一来源公告'}, #{'nav':131, 'end_page':249,'status':'招标预公告'}, #{'nav':132, 'end_page':1,'status':'邀请公告'}, #{'nav':153, 'end_page':7279,'status':'合同公告'}, { 'nav': 100, 'end_page': 4, 'status': '招标公告' }, { 'nav': 104, 'end_page': 3, 'status': '结果公告' }, { 'nav': 105, 'end_page': 2, 'status': '变更公告' }, { 'nav': 116, 'end_page': 2, 'status': '单一来源公告' }, { 'nav': 131, 'end_page': 1, 'status': '招标预公告' }, { 'nav': 132, 'end_page': 1, 'status': '邀请公告' }, { 'nav': 153, 'end_page': 1, 'status': '合同公告' }, ] for task in task_li: for page in range(1, task['end_page'] + 1): params = { 'app': '', 'type': '', 'nav': task['nav'], 'page': str(page) } self.load_get(params) print('第{}页'.format(page)) def main(self): self.run()
class GovBuy(object): def __init__(self): name = 'tianjin_city_gov_buy' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://www.tjgp.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': '*/*', 'Referer': 'http://www.tjgp.gov.cn/portal/topicView.do?method=view^&view=Infor^&id=1665^&ver=2^&st=1^&stmp=1532324224291', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', } self.rq = Rdis_Queue(host='localhost', dblist='tianjin_list1', dbset='tianjin_set1') def is_running(self): is_runing = True # if self._post_ret_url_queue.empty() and len (self._post_ret_url_set) > 0: if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_post(self, data): try: response = requests.post( 'http://www.tjgp.gov.cn/portal/topicView.do', headers=self.headers, data=data).content.decode('utf-8') selector = etree.HTML(response) except: print('load_post error') else: url_li = selector.xpath('//*[@id="reflshPage"]/ul/li/a/@href') if url_li != []: for url in url_li: url = 'http://www.tjgp.gov.cn' + url if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def load_get_html(self, url): try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except: print('laod_get_html error') else: _id = self.hash_to_md5(url) # print(_id) title = selector.xpath( '//body/table/tbody/tr/td/div/p[1]/font/b/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) publish_date = selector.xpath( '//body/table/tbody/tr/td/div/p[3]/text()') if publish_date != []: publish_date = publish_date[0] else: publish_date = None # print(publish_date) source = 'http://www.tjgp.gov.cn/' area_name = self.get_area('', title) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name # # retult_dict['detail_url'] = url retult_dict['content_html'] = str(response) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '天津市政府采购网' retult_dict['en_name'] = 'Tianjin government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) def run(self): threading.Thread(target=self.init).start() count = 5 task_li = [ { 'id': '1665', 'end_page': count }, { 'id': '1664', 'end_page': count }, { 'id': '1664', 'end_page': count }, { 'id': '1666', 'end_page': count }, { 'id': '2013', 'end_page': count }, { 'id': '2014', 'end_page': count }, { 'id': '2015', 'end_page': count }, { 'id': '2016', 'end_page': count }, ] for task in task_li: for page in range(1, task['end_page'] + 1): data = [ ('method', 'view'), ('page', str(page)), ('id', task['id']), ('step', '1'), ('view', 'Infor'), ('st', '1'), ('ldateQGE', ''), ('ldateQLE', ''), ] self.load_post(data) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''广州政府采购网''' def __init__(self): name = 'guangzhou_gzg2b_gzfinance_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://gzg2b.gzfinance.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': '*/*', 'Referer': 'http://gzg2b.gzfinance.gov.cn/gzgpimp/portalindex.do?method=goInfogsgg^&linkId=gsgg', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', } self.rq = Rdis_Queue(host='localhost', dblist='guangzhou_list1', dbset='guangzhou_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get(self, data): try: url = 'http://gzg2b.gzfinance.gov.cn/gzgpimp/portalsys/portal.do' params = ( ('method', 'queryHomepageList'), ('t_k', 'null'), ) response = requests.post(url=url, headers=self.headers, params=params, data=data).json() except: print('load_post error') else: response_li = response['rows'] for ret_dict in response_li: if not self.rq.in_rset(ret_dict): self.rq.add_to_rset(ret_dict) self.rq.pull_to_rlist(ret_dict) def load_get_html(self, ret_dict): if ret_dict == None: return try: ret = eval(ret_dict) url = 'http://gzg2b.gzfinance.gov.cn/gzgpimp/portalsys/portal.do?method=pubinfoView&&info_id=' + ret[ 'info_id'] + '&t_k=null' response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: _id = self.hash_to_md5(url) title = ret['title'] status = ret['info_key'] publish_date = ret['finish_day'] soup = BeautifulSoup(response) content_html = soup.find(class_='row').div # print(content_html) source = 'http://gzg2b.gzfinance.gov.cn/' area_name = self.get_area('广州', title) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '广州市政府采购平台 ' retult_dict[ 'en_name'] = 'Guangzhou Government Procurement Platform' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: # self.load_get_html(self.rq.get_to_rlist()) spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ # {'all_page': 329}, { 'all_page': 5 }, ] for task in task_li: for page in range(1, task['all_page'] + 1): data = [ ('current', str(page)), ('rowCount', '10'), ('searchPhrase', ''), ('title_name', ''), ('porid', 'zbcggg'), ('kwd', ''), ] self.load_get(data) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''陝西政府采购网''' def __init__(self): name = 'shaanxi_ccgp-shaanxi_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://www.ccgp-shaanxi.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Proxy-Authorization': 'Basic MTYzOTY2MzE2ODphamxhNTJ0bQ==', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': 'text/html, */*; q=0.01', 'Referer': 'http://www.ccgp-shaanxi.gov.cn/notice/list.do?noticetype=3&index=3&province=province', 'X-Requested-With': 'XMLHttpRequest', 'Proxy-Connection': 'keep-alive', } self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='shaanxi_list1', dbset='shaanxi_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get(url=url, headers=self.headers, verify=False).content.decode("utf-8") selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # self.load_get_html(li) else: title = selector.xpath('//h1[@class="content-tit"]/text()') if title != []: title = re.sub(r'\r|\n|\s','',title[0]) try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath('//div[@class="content_about"]/span[2]/em/text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() else: publish_date = None # print(publish_date) area_name = self.get_area('陝西',title) # print(area_name) source = 'http://www.ccgp-shaanxi.gov.cn/' table_ele = selector.xpath('//div[@class="contain detail-con"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '陕西省政府采购网' retult_dict['en_name'] = 'Shaanxi Province Government Procurement' # print(retult_dict) # print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self,noticetype, page): try: params = ( ('noticetype', noticetype), ) data = { "parameters['purcatalogguid']": "", "page.pageNum": page, "parameters['title']": "", "parameters['startdate']": "", "parameters['enddate']": "", "parameters['regionguid']": 610001, "parameters['projectcode']": "", "province": "", "parameters['purmethod']": "", } url = 'http://www.ccgp-shaanxi.gov.cn/notice/noticeaframe.do' response = requests.post(url=url, headers=self.headers, params=params, data=data, verify=False).text selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # self.load_get(types,page) else: print('第{}页'.format(page)) url_li = selector.xpath('//div[@class="list-box"]/table/tbody/tr/td[3]/a/@href') for url in url_li: # self.load_get_html(url) if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def init(self): count = 3 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ {'noticetype':'3', 'all_page': 2}, {'noticetype':'5', 'all_page': 2}, {'noticetype':'4', 'all_page': 2}, {'noticetype':'6', 'all_page': 2}, {'noticetype':'99', 'all_page': 1}, {'noticetype':'1', 'all_page': 1}, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: noticetype = task['noticetype'] # self.load_get(types, page) spawns = [gevent.spawn(self.load_get,noticetype, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''广西公共资源交易信息网''' def __init__(self): name = 'guangxi_gxzbtb_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.cookies = { 'ASP.NET_SessionId': 'trbofu0uet0aywbdhr35s0x4', '__CSRFCOOKIE': '6f7e275f-5762-4569-8ea2-ae98d3b0379d', } self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http://www.gxzbtb.cn', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.gxzbtb.cn/gxzbw/jyxx/001010/001010001/MoreInfo.aspx?CategoryNum=001010001', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='guangxi_gxzbtb_cn_list1', dbset='guangxi_gxzbtb_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: # selector_div = etree.HTML(str(div)) response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//td[@id="tdTitle"]/font//text()') if title != []: title = re.sub(r'\r|\n|\s', '', ''.join(title)) try: status = re.search( r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//td[@id="tdTitle"]/font[2]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.sub( r'\/', '-', re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})', ''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date, title) # area_name = self.get_area('', title) area_name = '广西' # print(area_name) source = 'http://www.gxzbtb.cn/' table_ele = selector.xpath('//table[@id="tblInfo"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '广西壮族自治区公共资源交易中心' retult_dict['en_name'] = 'Guangxi Zhuang National Public Resources' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: params = (('CategoryNum', types), ) data = { '__CSRFTOKEN': '/ wEFJDZmN2UyNzVmLTU3NjItNDU2OS04ZWEyLWFlOThkM2IwMzc5ZA ==', '__VIEWSTATE': 'z6UhCTu3jqnsz47aHWA7gSWW/wG9YleyN9akSy8SDfrTIhkXv/8D27JYdKJG/ZWKPqb0smc7bS8/xKHcu0vIwbRrxU6DQIlkQJ3m97wtYMFtK8KpjZwIdMqSgnw1q3DjBu9vEraO4xfqzJacAXSCukutXa8BPCyLevI3U1naYHFUSSNqQhNo9cICs8Kqr8n8HNpvSRjqJB8CTAWoGlc8x6IeC/j50VdUETRudT9/w6Xc0To0rsY/yH+VbMRbAzIFlzNvQP/dmUPEdjMSRkvyULU6ZIgal19QWLJXJSGioQKJ3StzC9BqsgyaCobteQoKLs8/h20aMOCs2YO/oSpUVr0AaapTqrGIMhrM/IaPn8N00monNce5uU1fWffkBK6zL4mJijgPTmuyCrA3/AUY5La8VvP1v2NUScoKAdjRaPypTDlh1+ZUt3x7ZdwcmWd7bwgAK42uneSLZWSC7Er0k9VcuPZTR4E/I8fbEzJWf4Bf9pI2hs5actOcnD4ETfu5m/dgfq1mgz4uTrYIRcqE1xOmE3WDJNircnYO4xVTI49MDYfgDcGtjWRiBZHd788/5abVt7h9sXkzXPHboi7zVv4haS8yZsIEeQG3F8MFVtM7H9+/Kbss3uPC5I5V/dDw54S2zejVmbAx9dU68wJfL1+c28EbvOUOWgOE6dCuFmTS3nSiBLMjwUeRtKwhvP1RA5MsKD4WI8JtqR545DULfQz0XJCh1PlO+Nd5L661UuspC/PvgWRoNQEoaVpLJK1S9UuPAdNnAqXdMuJdZZPu60+Jdig7zOBSEGbmwNvmXB0vphagqkqxf1nruFS0OGP/D7OJcbcObzotOwp1GpMmBdqg6hiDh2nccyFZ+E4DUv5NssGK4Zj7dY2jhMBv8bvkIwaY+uLYMLJwJ0DqhNyP2JTKv/FmENt0pjfytL0EU2HRmLTcPgJWgdQ2IZ7GZCYhkSzBfkkJOmVx7t+UZervSy+hZBsVsaz0DpKZ9JMVXfOYVzQNZt+VmcmIn9feEEJH6F8t4gYlC1pvrfcFcBVI8ndumFTtsYjnHhD7gMG8q64rCOoD0DAB0si2OdOndIUczT0RlhLkpqa9RA1nQ9kj75RJGe/dm4NGfCqqUHcRJTNbixZUPdA2pJNzYhRAMLQBqGmFANV+tvqB2yDiJg86H6ouBO7v2+SXxkp70ZBxv8CiAHw2kKEWoxfqmnMi552GiJRSrpOhcw3ylrYA3dINyJjtDJ9ZNYxLGWD5Vzu170wePz/foMZ6o2+8zWgEBc3PDx2l3UTG1TUwt8dbQbokscyKFWtCoo/qs9kkZS1KYBZ2NKe8K3EswLU3d5pHJsUtFhe2QtHhMolkwM3LTzBIIBl1QyPS6HDnCasCFHNbGX2/k/TMPLEBrOpdsRo1YhhhNMYz8pwQbwTxo4WRsmmQBvPUkTSlk04Iasp9Tm8/2WO/NIFs0Y/h0BvdanXJHwP8blNEMZCd5qmP02LdYGygy6hs2uU79m+VM6wtHIbYxkC2AKuDkErEqaOuQCNNiQfyP6e0oaZwNcWQOOaQDWsz9/F12QCDwx3X2ihIaG2v1YCQHKC/lfnBQ8o9Q9LvrLuZ4yjVbaO3B1eM0Q46zuTbT9KbhLwkFm8LH/2JM/OOvvUDNN7A4z42REh3kZWC0eXOyCDD1HvNdOFxluC6GRtEdv6/31i/PKLPr7te5VCIGCFjF9oCxquF9+2ecYtlmqcvbEvCKnPx1JDO1BloI0i8TqwjDmqyaORTTYzrtJwnXjKiX+8TKDC9yyOMXA1qbTt/KZPTpHI1R0P9qQ7Pk8AwKeL1y5g68OMHUqOsSyJuo6SNety/xymBke5m1FG2zE9M7OOqSGtV6NCKXNoSgi3laUmbAMZ8x+GOQnXrfpMGvtPUPIUY9zEvUiEDeKKQSnqlxf5LwEWyWlEuzSjO4+8nIGVC9nUb/YEIM5o2wiC1lMXl2d2tDQ/Mau5M3B6qmSLx5QP3nfjDKsoFqN0tQxlo/TBCKLXAUOHM8zTrEBY/xkb6tvnijW+leCYPSKURnheByCjFWSPnlz6C8tiktma/JzVph5blcc3thpmHiGp87enSqKQkjIf8RJkVeM+ENhg0gAndrokhhPBiS+MTNCuX2zXimlH2dpTY7JKu4uSyltVswpG2mLWFGegTeKLsBjVks8je/eeJvAaevVRh9mNOD4Cj8jh6/6taR2ee0/EjYlDIkrCNstLNBcQ35u7NQOesHpN9j7Zxf1iSsz1ChY/fS3w+3AVg7hnZA6yr1pUa54NWQEakrAgjpNUTzdTkSfyLkGmdqSXZEady89XXYBKDfF6rkDa8hb19ujrWQZn9m9K22OeAw1k3w8wl29I2LMno86bezhCDhZRVa2RrsbsYAtJ+TMnEdWUuINhSrEbe9zRRga2N4BJv+eopnSClJYNNgkMiNVEOdWnfDVa9Wb9iqVRYfjBKfZRv8g4/tlMr6ygKYPBRprLiv1VQT9M+5hkhLWgtGeyOzTGxfiZG6QHnqSL2g/A+nu5Ij3fGoVDEPPj3Adcqk6AUrcY+XaJxR9wVVz927mFfFq5kxjo12Sw2ak1pS7faIy7o9Fk7Y9XKh1qu35ltABHEqiVIeb/dymZ7oLV+AClQeLbbmciJ7NKrdzTwRxanqOirpiPl5MnJtQxROEbt6lYeRG1RzEUsKMlp/L5v2aBRnkVWC4odd6FafVJw1NFDAhtVrI3uGta566tdsuT+FYaXOtELa/hUjBES+jWAJ0+qrDVff6ilka90N5wpQ55cjCwAs1VtaLa6b/zuin4h6+wfwtJnEGBfXND+1AQSbrveJHojhedFjPAYsSG988yhO0A1+TdQWGoJQmlEINiELipfNz/CUCbHENz431cxEjZV6No6qEXLUVXcbXp0BRB8sOZWtmbJ5LaLzS+unRSRN9RMk/80ct6AuINtSE2MCwrBpkrB3DhkebVRwWxxODsfGOj20j5pVpeI8jF75k/9igiTP/+3+N20FTsoJ/fVXevJ2YTUHIrJZc2j3bNDZ6LuHcJbEjS5DQat9WGeZa2FzDRba3ikBTxMevju8T9I2s19yFeztg72WQTcyDhN0I/TryQNcqZq67e8ScokSwQ1pE95EkIBdxk+7J9IIm1KHGp7P1T6PmxBqSyCyJT53AJgQxbhG2N+2NCpIk0ZfKA9Apvg/UfBFli/pa42N1XCdVnLwWW9wOY+vSbuo9Fnf91wTW1SrH1cZCrcWDFzJTlB703WUdA97ZyWuRMwypjXj5RGpTRi1R/maM3DwIcC6ktl+aczr8jK94UVPZ2iNVmgk/Ml92vly8vycYSTkHvFCHmw0gzSyhBjaCDSEL80nw4T4XjrrNfohWQRYDnk+isTfbfmpt6KRz8yIczndwTZdSN5rYigqeAJMd9DAxm28DcGCUk1nOyeASMtByfmPDd/jp6ihDR8Uj10eaty7X0LyjvB3Ol4kjvNucSPwJhwe6PCULDCMKKM9EQFTs0UiiyAhA/1N52njX2EpWDLOnT8yfMMDfDOwdwex/3DVo22nYjzTArBjbjJ4N6RtPW0rrWXJNJFHpm6ZSUTFZXgtZw+wAvBxRWiuXsvQqUYS4a25rN1/8aIaKxV9rxhSTZzF7l9K5S0wvjF1+kwarDs/M5SQT8pZtdEnySC5tgn057VgiCpEHbCWYm16zWPv7ARLsRV8D21nmMoYAJqJ5jZZMcrVTMuutYG7zc7W2rmjt2Nto/enbDGWgBeyMCsCPPA6+VYvOXWV6JTCwwCUQ//+LH4z1Kokk02ObYuNfwh0x4ilnU6JYM9t65ExOl7shHpKQUHrXwtwDi49hZNTD78s3yPOJYa5E9delhUSFFCAqH5/AxgSFKMOJXyBgsQlntLLWlYGCUabX61ClQuf3flIQ80RBZKlwA6qTpW3dS4EcgCP4beaujMVq/ifreAkY3hGwZwbdXViux7rLJTdj188Bim8KVbCYfIwWWoin8Nsi/rZiPorqikSMdyEw9VoWtIMz6/PNeJY5mh68hzeCGFKEIRNDPy+wMlMbh1Q1vzj1RTQa7sMAaDrq99gx3oc+CXHZKpbVwPOk/HwjJ6JM90TNrZdBIL0+PW98LgriR5FuqoUFp4DUHMSW0YjZDqj+MUq9OMFhOCFUTzg53NkBlgvKdzzr8Afve7xL9pXCcvXdRPxCHW78Hj1cJn/zmOe19RissiNTqUS5ArxaCeiD3IEmVKJboz2B2E7kp+mwpjCvx0IJ9HUUGJiBeP2ayo9SGOxZPfKVZ3hLV5Yrk2kyOagPI9ZA7kNzCRQO0+cgObPKve9kqANbcB7CxIWP7yVTTMGN2hHwzK731hA4nU7VXT2af6fO1/A42/DHaqmLqgBNBij6ihMW+xtOUmfJ9Fft/+9fTMps9rvznPluGxp4LwmLiugk9OEg+5qzJMzpec/zYFU0L3GWPiMJpcrBgO4uZ9Sl+beLk11GzbrFgcL+3Uhb7dzgxZvAaE4kHPbx2W4VDJGCuXdiTTlPZFwV2KTE2k7U37bP7IvgRDSu18ZjXqS0ckwDqd/jbXwmc84FLEo73rs9D050kmeYREx9c/GJHs6bR5bTIKkrCorEXJ+I1LNItiyYpgQ0fCsutxe91UwVLh4IV1l+jmjQoOeY99vzYmqJ/mv1FbWuqTSFZzHOIJmxpY3hSHGsnjh3fTlCwp2vb7OI2OcS4hdPfm/wUwiMoO4o0+MEEIZq4s2/243WkXxnQv4x8eGJkbBvhlhKgNOoNxwB5wgAAnhXkH3PH08VS1skVudmUwMNChQMwKnQr44CUMhYsmy3PXftAeLMBvTjSAngfdupJU6mV6hQHcioY+uk3cq0AfBtDdRKa/ANMFXNFt45zbANxG2wtfbaGLKmSETIPxshs5KupcFM+E0ikl+/iO8sLV7tbIqPmgzKG4kuovGfVw/Io8Z+ol113M9419oCHr8M9LZcqOw1HbQcCC2hDQCyW9aiCEryPZyUN0c84vCukRQACb0YeTBu8Hl693+QJd0KVAJ8c05wTRa0xBjdsTdZ2jVGdSez42wtoI/ZaMsjcOFKrjaeMuzH5ZWNJROiawSaucbQfRtrfvIXBDaOacqMEIFp3qU9wlzUYAAJhhHp0I2DeM4moOILIdIS0hflR4p2MLF8VR9TO6sy3qaQ+omHxh4mWqVin/PqYKElWtTbxMOCM5U5sxJHVw+MsnD9lcqpWRyunuYDGtMdDLOXHUxRsoqk7O0X3gB2Pta+ffxXL5yNMsQAMBqzvlO/x6N6gWQxkySjqMwrj+oeKs/uVWuSbxvnsGAkR1k4XobilSn8pN4Tws3cnNH848CYCoLrOEIXGQOFfm5IqLBami3ECbfrxZOnlctJ2O2FMMtM4oKK889EbGznvm76A2lOEmgIMhPDFsNwca6AJRIP+AbZVafFTK/pjG/DQR+Onj5x1ArfG7xkX1GcgsKqlPk1XC+SyBa1Q0/BE+lvrYD7/ozLSA9t87Gsm0/+fFpWr7+Dx7dKA1qQfhE5TU+uhAn5iz6m/4mcH1JTKhW2EZVdLI34Fg8MVPBHDoGwcnYGw54D9UT1dHjUdYKXDmkECVg9t/fGLNAryddSE8gwBmGQPQBCg8ACFDG1Vz7pz4DwtIHtc+vs8Q0tjuCRut2S7fexj9jEXaUHUaUiY9yMHL6g/3X9/7WsxsQ9BVauhusCPC4WjsKkFny/W7felQcWbX9OJ/73kRA78BuG1yWPN1xEkZFe9IWQhMCCOKZ+xXJs7IBi4bsctunx/TyWznFXi5mUtVyLgEG1JAG/7MvLXxyJrg2RhViCrMv/zWdjxuaGL1oPA2JINl9QnSsWFMYJwsUFy93HIP2KIILJzam7R0Q23+Xj0ioiO9tFl5PGAlLJEMhRVREnayraf5PKcmAYJsJNguyoTJhfyFCsC0kA74a9S7YwXiBnr7SLHNuVvBACVyvcSqGsVb/hXDDwzdW+UTXiklYnH5U7POZNSkXq539j+FG71Ndxsxz906PmTb/ZU6d3X1Zlm583SRB8VzYf5qCXrHJCK7d98zytr9XKoUH1rIItoqalLp67udBMEOqRrdiG5GYV/P117dunqKt8cVryDjUuiFfkNNRSSBknnFEVuIXdeFOo/tgfX6AqU5sDmjajo88fRSOnnDkAK0YazroIwporIjp8QxTCv+HLLpt1FsQWnxI7gc1hNaUnzCkTuoTTwLzIAKzJ5iWfgJvu2voRLFZ7crFe8gJ5eCZ3x3O6uvxvkhit0XYFsuPL2A7b4agWGb+fXNbdccCoKVo1mZjI5EX6medskd6mcEEKscxBWb8skl4azvlcA8v4l58nkVF3P6puR3nR+nMlT+igLAEttSfIO4aKH2ry5R4D14InwrKbURhOZOiLmilVjqtTZJ1gI/pLg9F7d4FLpG0qINV+srl1aC56zfI4MkXjroArUE85yO/XmgrqWHS+PIFlUZyAEk7tb0HcK5J/vAt0MGEsya6QGk2+6nBi2QDEdcykbe9GKcJSv4JKlzjngqjh5yjz1PY2Ui4QsuAQYfVpLOJXFtsVyXxl9OnWNAkIcyRdjR+UyJUqeMrJmvCGZvyDkr0heFp+W0XN1aW6fOlB4wURO6wvvmT+f2cOR8e8oRW3UdURs+UPBVQSkaU8fHDFecHkfvruVuN0JhKFDGijcGQEicA2sSHfgSrzv38aOwCUmxsPaSIdLqYlz+Q+GzPkMFQpkQt43C1yLaEh8FSOkixOV/P2Y3q5PsII3yfgdHf6aTGAy3OPK7eWc4Yo/avmsj21hPcJDoJk3iMYGQE/kGwueljbGLkESjROGcbJOe7qwavRbM5Ok+TgKmR1kEeKJ7rU3UWh9Ttz+oBd+SZUXzbphYUvPLH1GLR0J8qW41Yv6WmL7Zg5XMYw6OmCWInmkSCQPoTUEhrkagnscZ7OFpdls0QE7tFTHKmzXU66cAD86BZofRkBTdYI0bk61VLr6hXV81YSBQTBVZu8FkAYYfI40l7FHDi/3fNQQ6vGGlSCz5ULlF4QEeBA5rzPBkzpcK22e+bl6YBOnnpx3N7edak3Auc96oGVFabec8QM3CUI4G98rt3A/OGQw9iu6P8WFfbuBQnCtva4pFCrJorA/6QXda247/pRL7ov5lMMc1qqLrYzxLgTUoYs0CCgIosEhucryWseQ9c5KzY+r0pChkUkKhkmXUxMqO6+5pFZ/ef1Oy4KXQYUMR+RU/obNSHyyB+L70Sw/xJCeGy9d9bCjMmkDL0t9elhkn0unvzObirMrHPh4h4FXYx6rxyfqdcz8w7KsElalaFk4PIQKupZTp+UayvTCKNPwLuaEXQr5tXccra5niBnN+TAWRzWKXefACVlF1xiVE3mhbH/M6gdTYp/Pj6fxWoP7pQG5lolcJsn84BG8yt2DYJUknDNBw992dolm7mpFWDbFySsKcyZfXTl9qxNUTG8ge19reYz+pNZANlWEQf2tG+StIiFZVZkj/X9DQECCuvK1aCPfb7jop14pPtOC9iNIjBG2/MvwoiqsDLz0IZMMA+Yz//STFJBO/mDzll0Js+znxQTl2VOuTxOpZ4SQvPnp2jPxVW/+EaA1PCQhOvy0x2kkH1K+KPsIJkQvLG7XbS0C+qOqvmccjBRN0iwf0DD+tqjYVUZ/EkLd7vtQEKL00HMKkdErClQxRPD/1bTe1aw3OUfegjlohma7sjZCQPrD/7Z81oVOZfLBxTM9kYwx5DdvZP8K2g/v3qjtEac4oT71W/a3yLRGllWEuKf6d08Yq2LrR5jcNy22U0B9R0exyFKegatzOOCoyxzQ4/GRGNuRXdvdnzwZqUCxY4war/yVplduX9R8pq+wZZLvFF9T1AN13JSKbB82LG/D7dMgZpw+Av8ur98jpUn8RoTPWaLAyEVFaYPSy5QT6vDHtXFXD8PVi2ET3uWpKCrVPRiy6sYGHB75XzN2MvXsqvRr7voBo4Rl4TXbZaznSxwxYLzHmIM8XzLekBxOGg+p6ROERQ0Bw0MYscv5TDPunfts+tIU2ykVfyfkt+4wyzX32uOseAi7rn40pXw2fixSAc8lBe3h7myKkGvkn2EkxmKsvs+6ML3TeoTherBgPi+8V3cCgIakdNXCyq7Dm9HeZ4yJEmgWaAHkLZq6C4ZmrJ2ZVXVFc8zxGao/IHFQCrsNMXa4WcnDdLKl/88v9A+W4nLQmDIcvU+rfQKGhBp2XbnEWrzewVw9d8ysuyeqiJyjvjjBIbLK+AZapva7xG74cN9FNuGWOdt7+pxiZes94+9ERUbT/Sxhdca+sGV1E9ueSv8Bw4FZ0l7qFOs2AyO65DUTekPwM3H84MMyRDXrVi733KMjduEnhjQtfoEYidQBuvpOUm5opb7xiVGgtDqtYU/P2D4Ztf1x+n/r7aZqytfI+8CJwKh9qhhgT5NKH4Bp/AuJVJqHsZIdUUNxrUhCprv8RU74Q1y3DimHkHr+yqr3LU7flZ1MnZQF+VZ51PgQTfhrGgsLCs73jPMgv9jLsRpNxs5K7EIThZUiiDMgdP4jicfrsI7e0XT9D9Nmpvwj2flU2pBkGNO9v+1YYpK2hb71KAxj9kE8KrKshiJHv9WU1RqRmWmIfvIvi+BjfaIMeywTCFcMKWFPret7zY8ZhJqvaowFoCyhNiLYWFjvKqeTZbeJti/O7AKjavWn8fa5LoHqGiIQeGjp5izIEbD79R7CaNNmE0suHKFFSjOqU1yrQQ8saoMkT3wHspM4A42gOD/HGFu0fNm/RGNZpBAqHmwOJ/6AhkKU2scSX0QXZrRXhLjABqvBo0z/OcCJSgTx3aUECmBfggWoSJcFUnfREqLlaecxEfme1ZKGkpNvJBwnNCscy9FQfQsgw0ryS1AzcUyX/VNgW/3ny4edpDK8dcVVmXJhft4c1yH+QLA1be0clmpLf64M8t4pkD0LGSXSNL7UqVHfkSiyaSWttwjdGmELYSQohx4nWEsPUO+tze1TYeBlsgVdH6UctVzFTuop5jLUVR3oHOBScFashAOHcDalzVcbzpJ0vn2n7YeCN51+5gPhgzWqd4bP1xNo0Gr0VTCVWpAqoTlRj6HekLhriSxZ6peiDarmlLp40AYGgViIf2Zh7uWa71YEDHlx+MT/VIQxPrSnnbCHGT+a2hzy54AoynEdL73gkCj8JgKce73cjtoPaFfRG+FLk/R+07JI+Al8cV07RwQysZSFZS1nkAvntcMgtBJLudhlc3IMP9k3l6EOxZqHJyZGoOwTfCMjLg2P7Z7SScEpykE+lzWFU0W6O+4OGpE8K/zYbErAdtF7JCLJLVA9AvKCyeDmQGdRGbhcPhnskFIGpL9RNHKZHAh8eTKQdE/Dk/K3+0wPGXD45iCk6lbgu9S5x0uE/kcVWDb3TfrvvoqycGwdnxALI7/lFVlb0sxrDrnNOuEG0canG+RKOKIPJZYa5FyVu1tXpr+2kYvvcsIwxVVzTl7/jOo4Fnmb8b/7QxXfZ3UVVLj+8N+P6M0qUCsiaE3pGnGy1HxEwfC7nIJfki3+tBIBa0hDnw1cxfQi32uvdlqyeV0/VX1O2tg1Dj3ihbMrG2KQ+YTKNjinDUA3QmF3K1ipKk2+xoilF8vuCQaEJVJIaDOOIwX4x+/4d0n1Q68MaLgw5mxK3dv1A8hr60kZ8fYPVvNkMCBeLo7cMVs4swMuVSjM6CxrQsBId/+JktBo7RHJakj7aeWdZ+g8ITxx54oNQutt9+h2QTKGpSDyU0j6kF61rn5M+H3MB1ZN8dLE22fcXjzHFGAKvJzJM/7w5LDQ/Oh/Q0Z66oDeacr+NtAjsok7FIn91NLerbGoy4rjKNc83qyoKwdDmhWrokeneCS5kqgTG2b17cGb1ynyBNKBFTqDtbnkFTK68vsJtP0hzN7hXfINKOTGUEPdKTE5WPyt2ZrYOoz0VnA01EewJa7gXUec6x/kBp7X9240r01ywuRGrw5l+JXtiUmYjOteA7iSqWwbwqnFWwAlBgfXIvw9hcrVN3eXxX4K3fIucHS2ibM3KE/e1VviGkCU8/K2OMzQHOuiQiix9UPiwF4oUGRalHxBchirfetT5yIhGsdLah1X3CyDEaxUA6Cos6rV4gB6ouVnVVw8pqJVY5JX+rYc161tRLFVmtrQZhbstM9Gbc5dJpHJl6xql//rGdgAcEnv5jm2xe2BHY4Wn5y4P5PGeuNe1fKBcnLlgpjK4dHMB0NUfLH4o2E767ZXB8rfndv19ZMfhMIU2E2x+A0MZNhKhy2mefFaj+wQ0OVddKhEoXYMETtGaP0pn2jfwd/r8jBwn62zgNRmZFfhJ4OqbYydTuuhuZQyfZpLlF3uWxE8tqNWWzLRRWTZVwBAzKexPEuzsVIiKkrXX94kYze8kt8KcoDkN19jrSVyomHZMBk94OnNouLVONkXWkDxvDIbVMvXSJs+uqk7228DmhZplBwNSpaVg19q9Ny0vCvio98Dh78Pqi12XYaKRohe7RuJbzUrwunTW4hsV5xAreCy9n2DtRKWWI1v7rw4L/750nS1LtOJXUDbG0FLCpRyHmVhckad+YXGK/V6QtTtVDOp+DqH/7mlgeTkOjzuXej5i03PaZhez4eXw6Cozt0BqmbbaOK7aBv1GdZTWWVlQ7A7fnGGCxFoElmuksWIKIzhwqf89a0Lnk0TjF37f55mvnr5F3XVW4TlVUhsKhsHIANNqb/xKBFdxWSjMJg12V/5DeItXrcpr3pI8KJayTCpBOqbzcfhk+fMjMmDY6/+f1E+nMpqRYfzecMDYwHwpPV0F9DT5xzddj/vFPQMWgR46dz/jVaakX804jDbJ3xCVBGa4EpCLR3Br8Tmi7lA8RKoRgxEayH4PYHpI++Zi+VdU9X5R0ANvWmFqtzzv2XuCg4dPwIwFAfmeisnvis81lF4xei5s7bTlubyuMo13VKRbMAYj92exfPxrwl5N+9qbnmIzidl7/mmGq5pNHJ6zUOXizulKFbnpJw2S65Aun2jmaWdQinTF7Nv+Jxcd+4GSkkUPcQNhIwoE7rIF2PaLBSPFwEYkro/FnxsWElzk8z1ReQikPzMGh4+GnW2dzU0qF+G4X0CNiVewq1of+B6jQotyvLXtmsinINsLZ+EtE1J7ld4El1EMvTPD4hyVHmU5TMlKq320KlRFE9h33vszSAjEmhnM695IoF9R8jlHQ7uDJ7n05l1da3nugwlRewsC5sQtuOQ2+DQq2MKwGKDe/FckChLyWE04XHP+pDmSnNzjzjScWJswnucFfv+ThapwkyJHzGIU6kFd1RXXSnusEkker69Er4NvK4MIYUIqUBXBBIKdOCD/90q8FB/22tu7JITuKl6c3vPlcSI5zUNdClEl99ccvLc2nY9ggGVe028=', '__VIEWSTATEGENERATOR': '16D6DBB1', '__EVENTTARGET': 'MoreInfoList1$Pager', '__EVENTARGUMENT': page, '__VIEWSTATEENCRYPTED': '', } url = 'http://www.gxzbtb.cn/gxzbw/jyxx/{}/MoreInfo.aspx'.format( categoryId) response = requests.post( url=url, headers=self.headers, params=params, data=data, cookies=self.cookies).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//ul[@class="ewb-right-item"]/li') url_li = selector.xpath( '//table[@id="MoreInfoList1_DataGrid1"]/tr/td[2]/a/@href') # for div_ele in div_ele_li: for url in url_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') urls = 'http://www.gxzbtb.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() flag = 1 task_li = [ { 'categoryId': '001010/001010001', 'types': '001010001', 'all_page': flag }, { 'categoryId': '001010/001010002', 'types': '001010002', 'all_page': flag }, { 'categoryId': '001010/001010004', 'types': '001010004', 'all_page': flag }, { 'categoryId': '001001/001001001', 'types': '001001001', 'all_page': flag }, { 'categoryId': '001001/001001002', 'types': '001001002', 'all_page': flag }, { 'categoryId': '001001/001001004', 'types': '001001004', 'all_page': flag }, { 'categoryId': '001001/001001005', 'types': '001001005', 'all_page': flag }, { 'categoryId': '001004/001004001', 'types': '001004001', 'all_page': flag }, { 'categoryId': '001004/001004002', 'types': '001004002', 'all_page': flag }, { 'categoryId': '001004/001004004', 'types': '001004004', 'all_page': flag }, { 'categoryId': '001004/001004005', 'types': '001004005', 'all_page': flag }, { 'categoryId': '001007/001007001', 'types': '001007001', 'all_page': flag }, { 'categoryId': '001011/001011001', 'types': '001011001', 'all_page': flag }, { 'categoryId': '001011/001011002', 'types': '001011002', 'all_page': flag }, { 'categoryId': '001012/001012001', 'types': '001012001', 'all_page': flag }, ] count = 1 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''南宁公共资源交易信息网''' def __init__(self): name = 'nanning_nnggzy_net' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.cookies = { 'yunsuo_session_verify': '2c0b046605eb7acf81b64a462d5a88e3', 'ASP.NET_SessionId': 'k2oz1d45keci5055fe5br43f', '_gscu_1349052524': '33974463sf7nus87', '_gscbrs_1349052524': '1', '_gscs_1349052524': '3397446376zl7787^|pv:1', '__CSRFCOOKIE': 'e0612cbd-55e6-4892-9a1a-bad08d9eafed', } self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http://www.nnggzy.net', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.nnggzy.net/nnzbwmanger/ShowInfo/more.aspx?categoryNum=001001001', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='nanning_nnggzy_net_list1', dbset='nanning_nnggzy_net_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//span[@id="lblTitle"]//text()') if title != []: title = re.sub(r'\r|\n|\s','',''.join(title)) try: status = re.search(r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath('//td[@id="tdTitle"]/font[2]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date, title) area_name = '广西-南宁' # area_name = '浙江-杭州' # print(area_name) source = 'http://www.nnggzy.net/' # print(url) # print(response) table_ele = selector.xpath('//table[@id="tblInfo"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '南宁公共资源交易中心' retult_dict['en_name'] = 'Nanning Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self,categoryId, types, page): try: params = ( ('categoryNum', types), ) data = { '__CSRFTOKEN': '/wEFJGUwNjEyY2JkLTU1ZTYtNDg5Mi05YTFhLWJhZDA4ZDllYWZlZA==', '__VIEWSTATE': 'ENWLp05tdLmofrQhOLBfOX9cVYZhmfSq4eEj5SOhG3tVllYGKwfS2gNjrfHqQfJt00vRa4vShYV8i/62pAOVCBYN6NwfCyeIgcbloQOvnnK1HovJnjtSKsT+IfP4pZPpdfEmcQrHQuVrp/c4MQkBmDNoJjrz4Nn2fx7yMCqrPxTvjhKVUwSnzvYeSCoiCV0swXlJan7hXMX6riIuKWwYCjcbZGQiMYHaJk2CIOtpIaILEHYCuQavxbUsTSQd+bEr6Uoq+U9UTHi/v4X+GvNREQOJkHpNcedt8lvkCzIpNIgCjeLIU55XGzTwkD2TE/QqVXLRkPZWZWyztjBOaqx0aN3WDsSzZO+tjgYc6tDIEX+f/o/nBNEWYpHVZb0tp4jMr9y5mPKn6q4uCQ3vAMAduJtL294HmwSSpiSeuRlsX/Epm2mk7NUbuOVuduji1cDQwkaU2lv0LFEY0IZHX52tpOKGQSOolksXGnsr+v2sLDxZ5SOKrlBu4eup1c+oYNbWZK68ytKsa1rJ0jcjjBYsET9bwr6Ps2+MytJtrTowfDuBDkM26PdZegUqdKSltqzkFu06Mo+oz4UazIB2ry5tb/62jTw3P9NNf24bOFG+YtbCOCbV7m4gqJ7OAcaPXFCEchokQd6ti+bkSTen4N1SceegmpAWcfkd55vFDY8bFF3iq0SXvUw3MOW22BhVoNyvguEZleEJr/y6fg/q+W0BTQGgmrruxJYwRXaxdrsbtx+3pIrue7P9dUZaJzlNJm+5TxFCp9D+2sOjnP5KKoG5tbntOHZK4iafoYC4za2YfnbWvW5lQ7ioDLPw0dIRPgV04rPG0cMPDWao7pAg95IBivlZKxQpjIDz871zLk5sYhEDFUH3pad6QlR3pD5elWsdc+4udjICLfJ+GSvcfJSdqc4aM5TRFZP6aTSFLUfn+An/OhTr50aYn/J0uO3arGlp0cvtn4/E27o71tuXbGuPYW3mrg2np9+XkFXLIfx5jJIi0eQtod3GKtILYhLgiDfsc5qCG3ueZy1QB7UnovrD3BU5oVbBMXHuzTNJUOrz7JakdrmXxgLEV3f0L2V5YrL9h8wmHuDmKPxORqxdaU/yejRJaEUjrjuBwC+rZsZouv7+o9Sg5F45qSjX+aZmWiwglICMdUdFQiT73B3ljjqeAZiCUoILeWM6dOEe6vSqxPYuELnnXSEimR0OssVqN7KHDmcq6Q+9dvyuzwGbAabNEN+TbtppMHloyNXZY6qLVTAgfbHr2Ir2tBTeHt07WXij4Cz/1/e/sUaF8Zl/Rgb1cC7GUIfA4AAyAW4a09zcej42E1N8ba3BcVLzBQ/ZrIZhFrX/ETaMMhCQMlGCIBZOn2nvItSDjpOfrlUxDfERHm+ftrsH44UtFpYP0HYR+0c4K6VISaunySinTWB9Rh6e6ax361mkRRNIs27AfMdqxByuVKeJii8nTUXEIFYaAozDczi4gDZrPXJZ/zAljjtBdfIaHFspPRDbe3Gdf3pZAWw9QzjuMkrppg2mDLh2wOJ//7mcKhMkBuXGJblNbFDzW7fdNvnSWF810t7hHL+v0kA1dZGw2Cl3RZD/IVTsc8JGGaKbpeN/AznRzMAH3/R9tXObE7nQ38J67dAtxmBtCdSD+IDqhumPWs9hlB3M0Jv/iJ8340VR4z/pjGe6Sy0OLyt4EscTB0mCc+AVnc5c94EpA71bWLT3z2zMhnoQTTlW1B2+8BJ5A0y4sue4wPP/KpII9x3A4WXc4r0pj4+xf8mom4PQip/9Cq8SUFxA+1jZsKWEolrMkE2WXU7/Qyv6+CGwsclG65FPkUw7+eOcQseuj7tq2ChjQyIvR1/2z3Z74Vl8GRTf0zMSMVfdPhFqdiS1Lkk+DQ57XOXhBenN5IJRXPdxlE7TFWYCPG7FZACHuwxnuVdODhR7bWxB8Zq6ySYBqYkXIjCYuzjwFJkgTLPT6LyUXhUgkFdoyj7KU08VcqkZrft+2Fc4PJ8BiuoZ89ScQwKGhvGuf9qbMwAR8m7aKyT3ZSwS99eNIq80uEWX4DXV0z2ipB95IfgooB1eQOFUoRba43Ld/KHztxb1eUZszZZxoG4mS0S5YS5An83vrap1BdQM0t0qKxEkUAFHfM9HxqbyU7cJ4yVgizRPGKwXX7NCb7m+UbhVP7bUImNszweAekuvVS9mypOc7it54YW1N8YZJrv36XocUOSfg+Mw+sb744qLh9uQ5ihOzD6sD+Jgb+fY+VBtheVUNMfCRlbishYUBvKz1/SXdZVo2nBLL4iWRFxUiRJJwxXbknOlRVA2HTMPGKLm+YBQJrw3U5Vq3tlJqWcPLG+5/+k52m4CN1EQZyS4wL59RSyYfdPvy+thc7b634/HSBS12dv+I7v4Pjvh3dJKB2QioEQtAcY8RKdLd1LwT9B61L0+4QjU4SBTAcaZ4wBTQfqdl650sOMlnejc/kCCPYY0ejM7Ze3MhsSkHFcFuZdhfumQW6Vx3LFoiAYJVXYZabgiHAIyZO2zdHfBwDZrv6RH4OLKnaad1WePqE6yYhwhCWF/ePXq6slEwKO8aZa3x86t1U6Vio2pmvq+pkk9gjcjXZUTwweKbVn6V1aU+wW/sYQJp3J1n1IsjiYSQDcmYXJP1c3pRvvdYNmuVDAVLHvtQwGioULwXtVAeMwoRXYwGf4JPEVQZHMX13wejqIwlbvUeIuna5IR7NTAUO1DCePkvJ7sq/WbNM5AaCy0wMOD8wovuD80YXPCgkIe7FFXqrpJKaOMqSk6+TxuPUKKNYv7tzYOFgx7G1BUcmYTpbKKBsvCKTfz7RmdA0fKB+i3ZHKztbvbsuYBw46LYIj/dlUyf6XcVnWl/akMCUI6O0Tm9r0s+zCI6YYyR+VfrbpHTPtnTuitVLLkJgdwixOkP9bwjivvW2E8CxGg2XktW2umjDOBY7ps4iPxBncmrs5Q1f9FnzwBMs0VOJDlfeNjlWHfynn2XYA5/Of319cxK9uibdseUUMP2MyQLwMm/1+z7eKeq2y+vU/dZEvAoTkUPV0ugupubWnDwH/wT90mqm5eNCHXEbs8JQ5sNkqCCNVrEPIty9hoDSmiibSjmSDOqiXiej6UOF+N5e8Ux0Os+Tie7B0roZKHAPBlgV3+fQbRpcenbkJnrLCkjGaDxorC30FUFqmOW1wxjELB0+vSSeloW0CkvKKyoepHhEMajyXQC0C7atZc7pkALHGmYs3T/2B+EJ0ITWJoqzYdtCRTL1zPqQP/yNkLxSF7LztLAb2SMrXbIl4ow+u6BGycfil1kGEH5uxOREAL8RLLZOi0FpW+m8/IHKqvjgxffKQLfryKpmsk5SH1oscZ5lzGcFc3N6HTZHs1ZeSMjVkRLWE0DSaAcx6juoFJ6bqCxMnqnyONC/iB0xuOEAQ0xValejZUsXhiz0tTcO17yBTP1v0R/uNF7tyStYuGkWIDGXzNiNrDPM6bnprYQleqSxjs2Zwa3TxM5ePrlVLaWgQmTiEsf+262eMTblCDPnHsXRRbTWByq7MobtPVUjafUpLzE/WzNl7YVf+vW35UDiVemsV2judqeajuXiALujUedXjS2BfDpfyYllHOgVOvNQQB1ly1dKvG026Krsi4DDrpdJldbaxKzENq01T/oKT4l0ag8VsHIRhBOwhnP2NKQqc6klqbcynsShUdAAykD0quW02Xw/xxWHjMyD3NXj22W24ZJwTEIjePAV6v3S4h7zPsFp53ok7zbhHENi7sAa5kIZx9AlyguXJqGEzFCy03m0rW+1aRWdzd+ZnhVcubVsdABsAXMpIt0hG0oR94fJr1jbRADnZhMXE02d0NSbEKHcDoFMSi8ryUmi8RGWUy56VS2d4W3wRoUZ6QGDn6rEUlOHuanuAYIBBFsRfX6EHs0dEM4eXd+du1lqo0lXY+64KcSEywAu4HoAMkN+I5S3ojdVZP0HSCqE8AcyG2O+rV2cuJy55O4896HNPdMQvFnY4SyGjU0cvx9UKTi5wx+A+s0Rb7kfUmJQ9gGNTFebyCk+Z3M1oMCdfcGiDBBt3F+c4pZjCYEwBmSxVz6NoH6nRSEd71JcLvxMtJtW7lgzTbF15Uc08UmXfFGpZ1s5pg22k+OstSqUXO5TjoPyMoFLmmHeNxkX0HVk5XehlCWloHUYQk3nwGnUvXNIguY65yCCI0EJ3HY7GajyAZMQGntqb1vkmqSCWnlK1MV9EMW9Dm+5pF8lYSFqPWglPaU6QzWVyUfK1MAE6OXTOxBotQe29GGr0CErAcM8TeFQ9Rd4/grSpExwKVyUZrl8stZXbSxKMqMjealLAstbB9jIrQ6cJ1ThqOaabVFB6/DwBRqsRxjNn6/1NynP1WivrP0LT4d8lLUPXm/JQDqrF4/mFmZP4LMdmUx0ni7o1MRX0iMNYIMEkRrJzKdCsqVZly5AeGKzdIU11bYQfOlmU8JG1bSww12ci6d97pBhhASMPM3DQlm/N/m8BjjooglCiOgA6onr9NsVoTeUy6vqzWTRNnpE/dNH60ityKU9EB8ojOgBR1+omtvgbRAXcv4eh3zBgN+2rtAWXWMZl5xZiWldFPXe/Rp2AkSuhPgHw7KesAixhgQ409wtCy1sKbJRDYHxSTxlzqIa0zZPnDq+K+yJ851UG25CgQCgrAHdNoDjL/FfkkoD4DQ0ZLC6aXRPl2p+eaDNjNzzFC1m4xwpf532gmabiDrU3Jbqr8kOefR4KYSEHu7U0+zmmhz0KN7tbJ8AztoyolRXxPmvfPuVK3d5FecLPc4vG0dfyPI9TaRwBxWa1czJcW+xp3LPYTulGqHjU6RQEm+OMFrun1L067L88VCbwl1niEkzAbsgiUiaDL5IrtozIKwhN6KR0ytQUpbCEQvW5DAJteaiiu2wuouoCUmD6IbQJlge51yex8DfrojDxfYxvHzEWFv9xj8eq8DuSh4h6frFxhZtL/tpZxh33AJ+lBVrb89G2DAqbzKSnbTCaI55QXuMH3IBuVkqjBua+Z7AnEqGkEqV7JHEZsuojcpnUH/bWZQqBjRrH3hts6R+A7FC1EgYeF117OdzeIxHGsioFCt8qvMlrT+Ihr71+DLGzeL7xm4ZP1PSYyIruaR2xP4oRIQ6wJGbkLG1erqM7nIfyGMo3NK09J2BcK5JqeeooMY7pPDiMYIrXtfWi2vyFL1MnBIXYXuVjQEehV6rE0gp+x7d2S9UPzdyt6ihrTNi4E3cmX6wj1qK1LkHmgA4zvfx2Fvj9l9rCjOe4DHNNlSl691RVk7xmRnIuTrbNEXk6MEbCD4XmbRdHn3kA2fWwJsScwLmHJdvsyJGzNKu4aMiuuFa2a/8mocXFdcvJz3WvVyhxHxRqUj0J7DlTgwuOiZCYNHGD+mVMl8rn80/d+UzsUdGEMdlLB1HfEIarGe2//EsBx8Bz5ohIHAnXERuvCUGAxLNka7g4qhCvXM9GX4fRMqivAHWIl+znUGKDF4/7aSpMY+aOdiFCa4wL1X8UEijsVNR43Aw8aZBBjVULAVy6vsYRmSX5Jn9f6ImbPiiQHf3M1Ux5hsMHp5+EZGAuW5DHM0Ey36iMXLfzXhX+ckJ2qB2JWKWEtfcNZQq1h/NakvZTMdvy9EH324lC5DAerKM4S+cTGPPIZda7YAnbTC+OaeSZWzFsNdnZWAoZiG57ia3XXF7zuGTWoVc5Cqv8CiGOpt4B8jQg4VRQznKVR+Vof1DoohzHKBw9kyvAx9ILGXd7WnqdtelwWhGO+aDcJkJoBqW8XjzXIw1q7bJY/PXA69dbpwuT/EwsCNpcvqk1put80GrJeE4VRijcprs6X6iPsCJslNWlWmE6JaVJgg44EPkSIogapPjzM4KbtHdWWrrF1VoUkr8tqH0O/WW1sl3kbr716Kcs+ZgBc8jiXSVp8gSpDHiad8SPXrjtTn0G1NY+CIw8EsYoJWrD2RCWxsa7PGJf+qa16B6UJe9R4Yl5l2BapFFeDsoD9lprf91z6OaOjE9vQVzRVzgBs12evLo4SncT27O1sfvBhfnuQ8XfRGpaZeE5aF/4VxOwAbVNatdRqEi9O3aSBCECgMY6mOwhnuy1/aJ8d3AGQdSRttqo5QET7zwPEIT6LaOQM+ZE/2Nok1zVa4+PA1Dbht8BDO4RD6xljBkoYiO3sb21Aogm3P2+xUkl5cJdx+UBWWOrHZkKNGuuy0U9gQ4yHpBDlSdw+EhQ5Xjkc27FL2jeQ+8GfcDdUC6CyQxYdoCVMTgU838S4e+XCq4xibtZ5RU+Ly1WNb0wgJeNbi6WPmuEdkzQ4jYmOt5YWTuM9jAzfgjSZPcmD89p+KD6P1g+KO2aU+pSYyANDpwPwrzg9+qQ8LFKo3g2Ctg4Ns5av5H8rIqExKkr4U6sSJQyzj6BOGx4aNDlRw9+zGeT2SugkaQ+nv0z67AdvUNBeYmwiEtm6GFfNqZ49/ugdrmTNQGY11ESBICili9nkj/fKpA9EqVfb4JknmbAPx6eekT88+FAcfqr+9MWrmchPQkmHkGT6aGSesKEBF/Duj1PAC9IqyXhI/wu5EuBuQchRND+xQyBOpzVbMEFo/cYabGi5jAbGxhzTzJ0RBmv+uHEVpshzsSMrUvMHhhV+MVYxCjcm0WXIHQy+4xG6tlCEpKj3fUg3HUpXdlenDAphGkqz++e7doW2rXLrW3Nojsk1NzoXj1vjSCbOWCFagh5oeUlELJOdIiU76RlOKcp2ymLPZMNyjavz0lgqpfMrqNr9sC82xLN5pu64LrFiow7Go7oR6lM5lWoM+T28dRnijTf4Yh8qhcjqVXhC0IU1eWKQ4lCV1KNYD64svpkXqWw16I4uJJ4LWN5Xz/4lTLqv/Dit74w77VB2bB2ndDH/F+Zjp9cyB28jWHtqOmeCmXXFjT4UNQn67wKcDT0+qqhum08I3NSzZ8m4Lu10rCfISQlplVrpu8DZx/fVl3g5TnxOYtSG81AYs5gunv2qPn/zega1hUdkLF3UGeVdVwKLziEv4dosEsRvVOWeSb5dwi28Z3xjRbQjRIwu+X8kfY7spMPTRJszAXLFpRDYvC+SaKKf4bjg+xyuN5zzrRgKLZkoMa0FSY1ErDmmH4DrGRZODDvKNeGnJhPlvYOZOYl4xJe6KgggUxFfyTKhg/N+HcZMxj91jlb+VZmWZkJ8FehLIiFWHZIRL1fVUDRzZ5/sidEfbsMQUK7x2o2emqHPqbTQa304fLiCYwVxdrwVxtRmBw6mfH8faVzMTdRIDWRgDIdBn9Th1ZchZy1UoiW4jdeeGS9CXsmySuLt7UTXT0PNrjfq18dldg2kVQM5Wjj4u0PWeMwQI265DF30bMAYfORb7mNEPMCCfG8nVUeTZryY3bSuJRPK/9eJXn62S8MTw5AMyLmw0XFOpGS6FjmUVE2OgtpPRuioqMFdEePfOV4k5Q1PsLsuKeYjK77KJQpNLH1R44yIOiiNqYR0INEAv2IZdDBepT61XVqU2CM1DzZbhzZbfBnNHdW84qZsaVNRHUV35hPZXpwwD6XE99fCaBbuT9e7biAyYbC8hEU9q+Jm+8cT7Xjn0/xJnVmO0K3wRGzUh6J6IBbUjsWJ4w20IASj7nHDKiLlG+nxL/xVwatvKsokbJpxojvxilKYyUKb/c9ywKd+oliYEsgIF3yKh+h6ZtmOyWZbEE8tdY4K+/yMGCx1MyCPGR0OozvwNLTEYm3GyWvCX5dyrPRPcLZ6AqgcGny744/l7HZWAGsbhaRyaA093l3Xvq3uuTa0ZG9PshZ+eMe0DDnGzUrI/TiQl9/5oGsQrZcnIMazVtHJSg2Wm0mEJmCn8cMAFiVHitlPtvtMLD3xNDF1IVCWXDYRxBiWwRG1EVwvyQt8A6jfc6svYHcKBr3EI9tre+bEu/ejdQ6r8PJjdJGEE/dhy6fwFmj8vZfLy9KT7GyJUXCJ9fpyj6tKgAwQwndnGICc1hgx5hC4q30znuAm/350a8X6CiYoTK8DIMc1PJR9QsRgEUabCH+aaXnoYy+DkScgpV3XFiUIfjWwrYkO0F9JHoJnSE9g+kKGcJ1Y8o2nsX7rbkfSCbw6PfiU3LpiO7CP+dnV1Hhrfy+Pnd1Dn03pfbCCVi7J8ZyWO891RMVh5t1cWiTvCiu8UhoE6GP5qLI3+zJtl1ANlGo915hJJCZTTHQ9t+9Gpn9oPsnnUkcAT/5GJKjv5fGG2dinkqToYtBVoo1pOJPjsCU1GsbW9/vXb3CUE4oLj4TyVrxtx8mYG9+FChmEOXXjyNk2X0TYb6loXn15yh0WfNmAcau4xHWlXAQBanVljzFNyXmSWzRvNCz/dJdLKELEz/aQ+agxhwjBD8S5dwMCkeas8uVTPNveJSf8Rva6HcUujGSi67RDlq6WfTC9Lxbnsv6vWURc4E2a+KNp4bH044H6IUDxm3LE8M64hL/+E2taoURouHGTM2ZRe9a7hejwkKDtMM/8keVUEfySrI7h6OReilVhOiSbXFWrB8kkwCG4NIe80vh59jRalYXvQj6G8vCXZHXuXvDHNabFmW8lw5HAvEFghyhAapVgCyPGHSACwfemUfMIlq8UX8C3fv4NQ9UjdJBonk+R9i/RwsRWwj/g0j/MfI+Uq33Sw/OkGuPQfvuObMkP74QRjJeRa6gwC49qsXLyPpkKW23hx4VI7dH8GsgzVosz8eXiufjjm6KteBnu8gV0EK4+kllvZzLJ1gILXRsC+LzGglNspk36cxJ3HqGvjU+DVPng2fbq0vkk0m9je0qeGB40GkzeCdbS3yJDcnm7HgTzRotTOL/peeEgBYctZqR00a9Gp3C4Sy36hSKd8gfJZKU/gq6LgXR+Nr+JgKfzrY6Sj0xiSNrhDLDrh9PJipSVhfOZNwFeqR2S5pcrUEN4BHRf+HrtF9Lno/WxSRPGhyQvY28mt0LQV9cqMIj9wxPKz3JefbKS7ILWYPNodvUDJWaUUmGoho+1FqIdvbSvWGbHCJRq82V6Irk1X8dRXEZyTXGqOhNGm1RWuTsYOijFPaCK9OQs5Tlun/FTcX9UH/uzVBRcAzXLreD/Tld50KBX/lFVQ6P5bpVQKFUEaekO8IJCK1m7fn5PUyCtNPrEQj0MhzPJ8559Dw8hr9++Hhmd1CJQT5ByusUcrxuS7yMmLFUvMy4bDS6VwTX9GJ3QSSbcB9SU9tS3eu/RklHAdHPF1AdNU8O/gKQH8t46L7Guttkofhi+p2YUWc2DnGVIyK0tme0xSyh5EYmqV0gkoqphzlgNvyfErKJwwt4WIM6inqIl8VE4ZSfyHlzdzN0KeBOKWtclt4TVaseFfDXSNUtxQDbv7lo8NlyOS+5588XCMXVVkyBmSCOu6v79l2AZRlTKfN+TIGDvVrxk9f+E8AL6GbJPKe39fmX4XNRWij+CLbTaiZOKvS6RE1cKP8shBB5rVMnpBGfqIRgmaKVs0pxDzsGilnf6+TDP6NgJQMNmRhjnnWQ7u50WHcV//1vVsNKNjEABuErRcgPTLbCXGOs9jLesg5RMYfr5ZcCYyng1it+tzq0GWaUosIOlKoQFDJQrYmIJwh1zq4M4xSw3ed5nYSZQnM1uOFS4EKaNBo9dT4qFh59156kj4JrVKHbhwr8pj6bwLmu4x7Qp3e7WF0GIv9Zv47um3hk/iFGhyosTtw3wyzky+r1X0PxW1hEIdJ8FZtGd6jQXEXrax7ng3RK/DcgcHGgrHG0wjHX1sF0LsFmPT2kgB61L6G+2BLjBtvoFofPAB5SiUjKli/4p1HIdtMGpsc12UAFYYyACK6fB2OQidLPjsnqImq7PrfJKjJrFKY5DQC6tvi3U6H/J6hJfedMGKxbwhEX6ncatJFCO0wxeknSUBfsXrKkiGq2McXHk6KtzdpohonTVS/b90IeOdju3vGvVPIHEyYpygbOS/6+mFBs29s20eomFzXyFzoZwBEeq4zgGDx3FtbDtH2/htWPgfzstnWwN/bnS0mMJoZwZIwhv7X95vrTIntb1QN7KrsRju0GkM8wWi0BRkdzaXS7+5kBxkZYCm1xvgdhgF1RdCMnsXjpH5Y7zZB/XwYXOYZ8QH7HC5d6Edqnp7a/Gs2dLUS9ie9XG2PX3ozrX7HD73reZBk4WuRj41vwtudVQazcR7qCIqtvK7ZkPQyS/PUYtJuw2yp2XXR9re6HIQ6chIKhRWx1NHr9ST9b0nv6JU4PDk3JKNUtSkt/oGC41pxEYV7DTZyM2giW6C7t9q0+zhiY7cOY88rqFa0pLWZ4MNe8VJGQH1ANvh7Vy3P/4IPMY/Xf/EypgV9zD44IpvRD+qP2cp+cGORi8+Pi1QkSJ9C1F7OKzJXb8qdMPE9IlwNyU2DAa6D+G0Fqh7hNN8NZb4jm8hT35IgJUkLBLuIaJxSzKcnwsctTZRPbzkJ2erenCFA2D/PlBfUGDVJ4fMLtzOZ/5/KYQ95Z5CdFsE0UQN74mALE08Qqh+pmLTi9KR5lXtGOMVN++3AGIeE/VFncOdeSQcMrAEdnkBHjFdhsDa6iZvJtQhIKxRCpy1OF/blvyDgijDB876WbV1X9dReLVPz1cDZqj/18mZKW2seYoBVfsMTyHmDoKY65wAe3pftO+nDfbc/Clsm/V70tvqMYMN6qUneHVGXIGQcxv2K+DnXwlb49vdtC541m+5ViNt0VD5Mku/cZzxM/5z2gdDR0w8pNsHMUGD8dCu6fwaO8I+/qmhB4oP3VZNX4X48QshIAmsgsqdZ3SDQAIj452a5LZSLELyS+VkfC1Fa1KZ4WUXPf/LWGD5IXphba+AthqnNK+/mmGRvkS6RgV56o81rfmNZx/qbozYHoxEwc1XUdWyPF9QM0qkdelmGlcfMrB+QGCGYXQxWwk5NfcvqwjVxGsH0maJaFt3clwV0r/KgKCphG2ehJ4GWfp5OmazDCLWVmtVF+HjXYctPfIvSaaPjHv4fcRiD9m/+iY7WBfvUWb22Cwvu1LwaiPw6HNKl9uJD5C2px7gpTM509ZKgrOD2EhT6XihdEv5P72o7ay6f06/adMU/vPXqxuvdrTolGkvqhVqHU7kvgJmJ5hGLkocfr5SSQo0no3g9e6VutYpzFEwg+rfzZWfN0fc3ejCK6ajPvT6Ztp7+PhsJgNWcU4WfL+JSZcDUNE6Gq7Wo5cy0jbQszKQcxQRBhsZ1ewGyE2f6xwrJ+uoEEoACn2oZLbaAGJUoSMKOSep6lU2f9yfgWX400J/FQq0bt/+3DIE22hA8xbdD2SNtbp0xrkNJEsMH8pevSE6Sxt7f8dqKNUfq+LOGJ3Pacl0uoQbSB0oEJa//TI77KvbVJP1rtiOFZVW6ZB9nKQCBAMGqjPpPfbyAi3FvKpp3tVktbMOEjP2hlAE52t5IS0yXwmyuPasCbqjn7R1Ws0O6ner2W37BDvFUFgfdTMEJOS8trekVQJcw0MbTahtpLiYkPSCsEdph2IpdCqD7x+Yj/pgerwBz9hzlRkx4080cksWNYAf2yJOUy2wIkcLFVarxQNDSzmptKyCY2gUMNLvbSM7A2d21jHJ5KbVYvi/ZMRX83UxG7MTmU72vm9iQJHaQs0afHZghZwI6mduPEEA7rNkr41RcCnikhFhIy9s+GF5HchGW+xzEqMgoppXAxckyT0+EKuwwfpmOuaxhghJLS0h5CSoV/wkVVtX22CqKDpn8qGo4EtbdHVHcDZUd++1YHYn7r/iwVF1ThD3ZCKYHhu0CZBvm0DEZNEOipnNsIHZ7AINYFFGUaAFYXMF9JBxUFXJkn0jTaVRglAHyW+T6WRgMy+9lJhbF/3eCKi8F4/t8iArkkT37h0pgnsWaPJK8jBFnLXc6onwlg9KciYKCIvjcC48iwK3nJPvZowyNoYQK/9Zm36rXXnXJUj38A0TegB0rbCmVGaN9pwVUSrhjdDZ9m4WVswNJD5IEZjAZkEOcsRE+G7lIEqiPdFZUr5u+wWLdv5B/7fu6EGQiHYDTOSXtp5GRdJSayg8gDVkeEgqCs8YFsDUwtg32Y6vKZpmU7zdrpd0OAaouN5r/buoGCqzn9yVrethR1XLmyL3HWD+C7/9U1Ym5pInakUacwVQl/emzJwRzazJG4ezqARdo/pifOOlS/oDNd9n5b9dcu1ibq8D3va2EpPuRpsKG1jSqfafZR2iuCbtaSb9JW2NFQ615xJRltHTujWyQSIq5OcMTtCKFGJ8Rkc/HMaPQSw03KD4PtZlMckeHcfx39qEU8y5nACalGWgr8CAQaHSfNb7UdGuZ8H57ufOvMZTring3umxPIKb5HVPQBYmNYicfMnge8v/EL+c/7RaBlnnGvf4EXza8lT/vqSsc7pLZegLyBnzSLYBQ2v6psJpa9WukqZ6dOpzjaLL4hUVA2fJryd3r/m4qPAhH7jP5n2D6NDYcaXy4ytCPiAMdbSt7sFHVfPpKldbVd2eLWuKkXq3kTXx0E7TosdLRUpyES6HQPptIZujEaEvbQ6PZJ6EBYwdGt+mxI9zEOMALaCTE5NUQ+0sINoFp+6BVbiXARCTQ09Or18XYYHpAm5jPLmF1zOveJ2NmtR/dzsjbdFQe04NmWroPeX8M8/H308CfyIoUZpFTGR7auT3F7mm1Tl7XlVZOVWG68D4z6kmtn6PyEilq32JOPzjyBVBEKWm/l3j2wy9Bj/ulrB3hfGDxUjRIYNhalz/S2XV1TKa+Oa/jti3PYxjJ3+rTCffSJq+LZm0MFTk8eQLGW6cGJOFZfFaYvdoI/XalfSvQkkZA62jAh6R5C7XbVebNsoqNcYSCB2IyRt15LIRf1eXbBO5Cxr8cY/If7GlycO3k5jOAeT8shklFvlDIAmYbHCpiD9JAY01woeqbk8XX9xBpKV6RVDRPLUBeJQ', '__EVENTTARGET': 'MoreInfoList1$Pager', '__EVENTARGUMENT': str(page), '__VIEWSTATEENCRYPTED': '', } url = 'http://www.nnggzy.net/nnzbwmanger/ShowInfo/more.aspx' response = requests.post(url=url, headers=self.headers, data=data, params=params, cookies=self.cookies).text selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//ul[@class="ewb-right-item"]/li') url_li = selector.xpath('//td[@id="MoreInfoList1_tdcontent"]//a/@href') # for div_ele in div_ele_li: for url in url_li: urls = 'http://www.nnggzy.net' + url # print(urls) # for data_dic in response_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') # print(data_dic) # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ {'categoryId':'', 'types':'001001001','all_page': 1}, {'categoryId':'', 'types':'001001002','all_page': 2}, {'categoryId':'', 'types':'001001004','all_page': 1}, {'categoryId':'', 'types':'001001005','all_page': 2}, {'categoryId':'', 'types':'001001006','all_page': 1}, {'categoryId':'', 'types':'001004001','all_page': 2}, {'categoryId':'', 'types':'001004002','all_page': 1}, {'categoryId':'', 'types':'001004004','all_page': 2}, {'categoryId':'', 'types':'001010001','all_page': 1}, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''广东采购电子商城''' def __init__(self): name = 'guangdong_gpcgd_com' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http://www.gpcgd.com', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.gpcgd.com/gpcgd/portal/portal-news^!list', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='guangdong_gpcgd_com_list1', dbset='guangdong_gpcgd_com_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, pid): if pid == None: return try: url = 'http://www.gpcgd.com/gpcgd/portal/portal-news!detailNews?portalNews.id={}'.format(pid) response = requests.get(url=url, headers=self.headers).content.decode('utf-8') print(url) selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//div[@class="pub_title"]/h1/text()') if title != []: title = re.sub(r'\r|\n|\s','',''.join(title)) try: status = re.search(r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath('//div[@class="pub_note"]//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() # publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None print(publish_date, title) area_name = '广东' # print(area_name) source = 'http://www.gpcgd.com/' # print(url) # print(response) table_ele = selector.xpath('//div[@class="pub_cont_details"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '广东省政府采购中心' retult_dict['en_name'] = 'Guangdong Government Procurement Center' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self,categoryId, types, page): try: data = [ ('portalNews.typeId', types), ('pageNum', page), ] url = 'http://www.gpcgd.com/gpcgd/portal/portal-news!list' response = requests.post(url=url, headers=self.headers, data=data).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//tbody[@id="bulletininfotable_table_body"]/tr') url_li = re.findall(r'onclick\=\"detailNews\(\'(.*?)\'\)\"',response) # for div_ele in div_ele_li: for pid in url_li: # for data_dic in response_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') # urls = 'http://www.jngp.gov.cn{}'.format(url) # print(data_dic) # self.load_get_html(pid) if not self.rq.in_rset(pid): self.rq.add_to_rset(pid) self.rq.pull_to_rlist(pid) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ {'categoryId':'', 'types':'90011','all_page': 1}, {'categoryId':'', 'types':'90013','all_page': 1}, {'categoryId':'', 'types':'40011','all_page': 2}, {'categoryId':'', 'types':'40012','all_page': 2}, {'categoryId':'', 'types':'40013','all_page': 1}, {'categoryId':'', 'types':'40014','all_page': 1}, {'categoryId':'', 'types':'40015','all_page': 1}, {'categoryId':'', 'types':'40016','all_page': 1}, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''中央政府采购网''' def __init__(self): name = 'center_zycg_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19', # 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html, */*', # 'Referer': 'http://www.zycg.gov.cn/article/article_search?catalog=StockAffiche&keyword=&page=2', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'If-None-Match': 'a872927a5615e69c3447de47a43544aa', 'X-Requested-With': 'XMLHttpRequest', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='central_list1', dbset='central_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, li): sele_li = etree.HTML(li) time.sleep(0.5) # url = 'http://www.zycg.gov.cn/article/show/311865' if li == None: return try: url_li = sele_li.xpath('//li/a/@href') url = 'http://www.zycg.gov.cn' + url_li[0] # url = 'http://www.zycg.gov.cn/article/show/527813' proxies = proxy_pool.proxies() response = requests.get(url=url, headers=self.headers, proxies=proxies, timeout=10).text selector = etree.HTML(response) if '打印预览' in response: url_li = selector.xpath('//span[@id="btnPrint"]/a/@href') url = 'http://www.zycg.gov.cn' + url_li[0] response = requests.get(url=url, headers=self.headers, proxies=proxies, timeout=10).text except Exception as e: print('laod_get_html error:{}'.format(e)) self.rq.pull_to_rlist(li) else: print(url) title = sele_li.xpath('//li/a/@title') if title != []: title = re.sub(r'\r|\n|\s', '', ''.join(title)) else: title = '' try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' _id = self.hash_to_md5(url) publish_date = sele_li.xpath('//li/span/text()') if publish_date != []: publish_date = re.sub(r'\r|\n|\s|\[|\]', '', ''.join(publish_date)) else: publish_date = '' print(publish_date, title) # print(response) soup = BeautifulSoup(response) content_html = soup.find(class_="detail_gg") if content_html == None: content_html = soup.find(class_='frame-pane') if content_html == None: content_html = soup.find(name='Frm_Order') if content_html == None: print(content_html) return # print('content_html',content_html) # print(response) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = '中央' retult_dict['source'] = 'http://www.zycg.gov.cn/' retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '中央政府采购网' retult_dict['en_name'] = 'Central Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, page): try: params = { 'category_id': '', 'keyword': '', 'page': str(page), 'px': '2' } url = 'http://www.zycg.gov.cn/article/article_search' proxies = proxy_pool.proxies() response = requests.post(url=url, headers=self.headers, params=params, proxies=proxies, timeout=10).text selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) self.load_get(page) else: li_ele_li = selector.xpath('//ul[@class="lby-list"]/li') print('第{}页'.format(page)) for li_ele in li_ele_li: li = etree.tostring(li_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') self.load_get_html(li) def run(self): task_li = [ { 'all_page': 30 }, ] count = 1 for task in task_li: try: for page in range(1, task['all_page'] + 1, count): self.load_get(page) # spawns = [gevent.spawn(self.load_get, page + i) for i in range(count)] # gevent.joinall(spawns) except Exception as e: print(e) def main(self): self.run()
class GovBuy(object): '''海南政府采购网''' def __init__(self): name = 'hainan_ccgp-hainan_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.ccgp-hainan.gov.cn/thirdparty/My97DatePicker/My97DatePicker.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'X-DevTools-Emulate-Network-Conditions-Client-Id': 'EAC4BA3425D26FC6B117994EFF4DEC28', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='hainan_list1', dbset='hainan_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def load_get_html(self, url): try: # print(url) response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: title = selector.xpath('//div[@class="nei03_02"]/div[1]/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//div[@class="nei03_02"]/div[2]//text()') if publish_date != []: publish_date = re.search(r'(\d+\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None soup = BeautifulSoup(response) content_html = soup.find(class_='nei03_02') source = 'http://www.ccgp-hainan.gov.cn/' area_name = self.get_area('海南', title) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '中国海南政府采购网 ' retult_dict['en_name'] = 'Hainan Province Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, params): try: url = 'http://www.ccgp-hainan.gov.cn/cgw/cgw_list.jsp' response = self.session.get(url=url, headers=self.headers, params=params).content.decode('utf-8') selector = etree.HTML(response) url_li = selector.xpath( '//div[@class="nei02_04_01"]/ul/li/em/a/@href') except: print('load_post error') else: for url in url_li: url = 'http://www.ccgp-hainan.gov.cn' + url if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: # self.load_get_html(self.rq.get_to_rlist()) spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ # {'all_page': 2521}, { 'all_page': 5 }, ] for task in task_li: for page in range(1, task['all_page'] + 1): params = ( ('currentPage', str(page)), ('begindate', ''), ('enddate', ''), ('title', ''), ('bid_type', ''), ('proj_number', ''), ('zone', ''), ) self.load_get(params) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''云南公共资源交易信息网''' def __init__(self): name = 'yunnan_ynggzyxx_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'https://www.ynggzyxx.gov.cn', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'https://www.ynggzyxx.gov.cn/res/css/basic.css', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='yunnan_list1', dbset='yunnan_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) print(url) # self.load_get_html(url) else: title = selector.xpath('//h3[@class="detail_t"]/text()') if title != '': title = re.sub(r'\r|\n|\s', '', title[0]) try: status = re.search( r'["招标","预","采购","更正","结果","补充"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath('//p[@class="kdg"]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.search(r'(\d{8}|\d{4}\-\d+\-\d{1,2})', ''.join(publish_date)).group() if '-' not in publish_date: publish_date = '{}-{}-{}'.format(publish_date[0:4], publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date) area_name = self.get_area('云南', title) # print(area_name) source = 'https://www.ynggzyxx.gov.cn/' table_ele = selector.xpath('//div[@class="page_contect bai_bg"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '云南省公共资源交易网' retult_dict['en_name'] = 'Yunnan Province Public resource' # print(retult_dict) # print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: data = [ ('currentPage', str(page)), ('area', '000'), ('industriesTypeCode', ''), ('scrollValue', categoryId), ('purchaseProjectCode', ''), ('bulletinTitle', ''), ('secondArea', ''), ] url = 'https://www.ynggzyxx.gov.cn/jyxx/{}'.format(types) response = requests.post(url=url, headers=self.headers, data=data).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # url_li =selector.xpath('//*[@id="data_tab"]/tbody/tr/td[3]/a/@href') url_li = selector.xpath('//*[@id="data_tab"]/tbody/tr/td/a/@href') # print(url_li) for url in url_li: urls = 'https://www.ynggzyxx.gov.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ # {'categoryId':'1002', 'types':'jsgcZbgg','all_page': 2538}, # {'categoryId':'1000', 'types':'jsgcBgtz','all_page': 940}, # {'categoryId':'842', 'types':'jsgcZbjggs','all_page': 3417}, # {'categoryId':'942', 'types':'jsgcpbjggs','all_page': 917}, # {'categoryId':'825', 'types':'zfcg/cggg','all_page': 2522}, # {'categoryId':'626', 'types':'zfcg/gzsx','all_page': 646}, # {'categoryId':'843', 'types':'zfcg/zbjggs','all_page': 2033}, # {'categoryId':'963', 'types':'zfcg/zfcgYcgg','all_page': 227}, { 'categoryId': '1002', 'types': 'jsgcZbgg', 'all_page': 2 }, { 'categoryId': '1000', 'types': 'jsgcBgtz', 'all_page': 2 }, { 'categoryId': '842', 'types': 'jsgcZbjggs', 'all_page': 2 }, { 'categoryId': '942', 'types': 'jsgcpbjggs', 'all_page': 2 }, { 'categoryId': '825', 'types': 'zfcg/cggg', 'all_page': 1 }, { 'categoryId': '626', 'types': 'zfcg/gzsx', 'all_page': 1 }, { 'categoryId': '843', 'types': 'zfcg/zbjggs', 'all_page': 2 }, { 'categoryId': '963', 'types': 'zfcg/zfcgYcgg', 'all_page': 1 }, ] count = 3 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''海口政府采购网''' def __init__(self): name = 'haikou_ggzy_haikou_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://ggzy.haikou.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'http://ggzy.haikou.gov.cn/login.do?method=newsecond^¶m=431241696e6465783d3326747970653d5a435f4a59', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='haikou_list1', dbset='haikou_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): try: if url == None: return response = requests.get(url=url, headers=self.headers).text selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) else: title = selector.xpath('//div[@class="part_1"]/div[1]/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//div[@class="part_1"]/div[2]//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None # print(publish_date) soup = BeautifulSoup(response) content_html = soup.find(class_='content_wrap') area_name = self.get_area('海口', title) source = 'http://ggzy.haikou.gov.cn' retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '海口公共资源交易网' retult_dict['en_name'] = 'Hiakou Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, data): try: params = (('method', 'getSecondTableInfo'), ) url = 'http://ggzy.haikou.gov.cn/login.do' response = requests.post(url=url, headers=self.headers, params=params, data=data).json() except: print('load_post error') else: response_li = response['result'] for dic in response_li: key_str = 'flag=3&name=' + dic['FLAG'] + '&key=' + dic['KEYID'] es = EncodeStr(key_str) encodestr = es.encodes() urls = 'http://ggzy.haikou.gov.cn/login.do?method=newDetail¶m=' + encodestr # print(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() flag = 3 task_li = [ { 'type': 'GC_JY', 'all_page': flag }, { 'type': 'GC_GS', 'all_page': flag }, { 'type': 'GC_JG', 'all_page': flag }, { 'type': 'ZC_JY', 'all_page': flag }, { 'type': 'ZC_JG', 'all_page': flag }, ] count = 1 for task in task_li: for page in range(1, task['all_page'] + 1, count): data = [ ('currentPage', str(page)), ('pageSize', '20'), ('flag', '3'), ('type', task['type']), ('notice_title', ''), ] try: self.load_get(data) print('第{}页'.format(page)) # spawns = [gevent.spawn(self.load_get, page + i) for i in range(count)] # gevent.joinall(spawns) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''南京公共资源交易信息网''' def __init__(self): name = 'nanjing_ggzy_njzwfw_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html, */*; q=0.01', 'Referer': 'http://ggzy.njzwfw.gov.cn/njweb/gycq/stateProperty.html', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='nanjing_ggzy_njzwfw_gov_cn_list1', dbset='nanjing_ggzy_njzwfw_gov_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: # selector_div = etree.HTML(str(div)) response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: title = selector.xpath('//div[@class="article-info"]/h1/text()') if title != []: title = re.sub(r'\r|\n|\s', '', ''.join(title)) try: status = re.search( r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath('//p[@class="info-sources"]//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})', ''.join(publish_date)).group() # publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None print(publish_date, title) # area_name = self.get_area() area_name = '江苏-南京' source = 'http://ggzy.njzwfw.gov.cn/' table_ele = selector.xpath('//div[@class="ewb-main"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '南京市公共资源交易平台' retult_dict['en_name'] = 'Nanjing City Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: params = (('_', categoryId), ) url = 'http://ggzy.njzwfw.gov.cn/njweb/{}/{}.html'.format( types, page) response = requests.get(url=url, headers=self.headers, params=params).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//ul[@class="ewb-right-item"]/li') url_li = re.findall(r"window.open\(\'(.*?)\'\)", response) # for div_ele in div_ele_li: for url in url_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') urls = 'http://ggzy.njzwfw.gov.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() task_li = [ { 'categoryId': '69171', 'types': 'zfcg/067001/067001001', 'all_page': 2 }, { 'categoryId': '27720', 'types': 'zfcg/067002/067002001', 'all_page': 2 }, { 'categoryId': '344', 'types': 'fjsz/068001/068001001', 'all_page': 2 }, { 'categoryId': '21869', 'types': 'fjsz/068002/068002001', 'all_page': 3 }, { 'categoryId': '48706', 'types': 'fjsz/068003/068003001', 'all_page': 2 }, { 'categoryId': '95248', 'types': 'fjsz/068005/068005002', 'all_page': 3 }, { 'categoryId': '74362', 'types': 'gchw/070001', 'all_page': 1 }, { 'categoryId': '83799', 'types': 'gchw/070003', 'all_page': 1 }, { 'categoryId': '81835', 'types': 'gchw/070004', 'all_page': 1 }, { 'categoryId': '4620', 'types': 'jtsw/069001/069001001', 'all_page': 1 }, { 'categoryId': '11321', 'types': 'jtsw/069003', 'all_page': 1 }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''济南公共资源交易信息网''' def __init__(self): name = 'jinan_jngp_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'Connection': 'keep-alive', 'Host': 'jnggzy.jinan.gov.cn', 'Origin': 'http: // jnggzy.jinan.gov.cn', 'Referer': 'http: // jnggzy.jinan.gov.cn / jnggzyztb / front / noticelist.do?type = 1 & xuanxiang = 1 & area =', } self.rq = Rdis_Queue(host='localhost', dblist='jinan_jngp_gov_cn_list1', dbset='jinan_jngp_gov_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: # selector_div = etree.HTML(str(div)) response = requests.get( url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//div[@class="list"]/h1//text()') if title != []: title = re.sub(r'\r|\n|\s', '', ''.join(title)) try: status = re.search( r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//div[@class="list"]/div/span//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})', ''.join(publish_date)).group() else: publish_date = None area_name = '山东-济南' # print(area_name) source = 'http://jnggzy.jinan.gov.cn/' table_ele = selector.xpath('//div/div[@class="list"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '济南公共资源交易中心' retult_dict['en_name'] = 'Jinan Public resource' # print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, categoryId, types, page): try: params = { 'area': '', 'type': types, 'xuanxiang': categoryId, 'subheading': '', 'pagenum': page, } url = 'http://jnggzy.jinan.gov.cn/jnggzyztb/front/search.do' response = requests.post(url=url, headers=self.headers, data=params).json() response_str = response['params']['str'] selector = etree.HTML(response_str) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print(response) print('第{}页'.format(page)) id_li = selector.xpath('//ul/li/a/@onclick') if len(id_li) > 0: iid_li = [re.sub(r'.*?\(|\).*', '', i) for i in id_li] for iid in iid_li: url = 'http://jnggzy.jinan.gov.cn/jnggzyztb/front/showNotice.do?iid={}&xuanxiang={}'.format( iid, categoryId) # self.load_get_html(url) if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) else: url_li = selector.xpath('//ul/li/a/@href') for url in url_li: urls = 'http://jnggzy.jinan.gov.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ { 'categoryId': '招标公告', 'types': '1', 'all_page': 4 }, { 'categoryId': '中标公示', 'types': '1', 'all_page': 4 }, { 'categoryId': '变更公告', 'types': '1', 'all_page': 4 }, { 'categoryId': '废标公告', 'types': '1', 'all_page': 4 }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] spawns = [ gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count) ] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''河南政府采购网''' def __init__(self): name = 'henan_hngp_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.cookies = { 'sId': '7c61a3bff6dc4969a336157b5f3dfb1d', } self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.hngp.gov.cn/henan/search?appCode=H60&pageSize=16&keyword=&dljg=&cgr=&year=2015&pageNo=15', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } self.session = requests.session() self.session.headers.update(self.headers) self.session.cookies.update(self.cookies) self.rq = Rdis_Queue(host='localhost', dblist='henan_list1', dbset='henan_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: response = requests.get( url=url, headers=self.headers).content.decode('utf-8') urls_li = re.findall(r'get\(\"(.*?\.htm)\"', response) if len(urls_li) < 1: return urls = 'http://www.hngp.gov.cn' + urls_li[0] # print(url) response1 = requests.get( url=urls, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: title = selector.xpath( '//*[@id="ng-app"]/body/div[3]/div[1]/h1/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath( '//*[@id="ng-app"]/body/div[3]/div[1]/div[1]/span//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None # print(publish_date) content_html = response1.lower() if len(content_html) < 100: return area_name = self.get_area('河南', title) source = 'http://www.hngp.gov.cn' retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '河南省政府采购网 ' retult_dict['en_name'] = 'Henan Province Government Procurement' print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, page): try: params = { 'appCode': 'H60', 'pageSize': 10, 'keyword': '', 'dljg': '', 'cgr': '', 'year': '2019', 'pageNo': page, } url = 'http://www.hngp.gov.cn/henan/search' response = requests.get(url=url, headers=self.headers, params=params, cookies=self.cookies) selector = etree.HTML(response.content.decode('utf-8')) url_li = selector.xpath('//div[@class="List2"]/ul/li/a/@href') # print(response.url) self.headers['Referer'] = response.url except: print('load_post error') else: print('第{}页'.format(page)) # print(url_li) # return for url in url_li: url = 'http://www.hngp.gov.cn' + url # print(url) self.load_get_html(url) if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def init(self): count = 5 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ #{'all_page': 500}, { 'all_page': 3 }, ] count = 2 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: spawns = [ gevent.spawn(self.load_get, page + i) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''南昌公共资源交易信息网''' def __init__(self): name = 'nanchang_ncztb_nc_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.cookies = { 'ASP.NET_SessionId': 'kxgkxo45v04bzs55ie3tib55', '__CSRFCOOKIE': 'ad60f543-41c8-481d-b0cf-accadc73c516', } self.headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http://ncztb.nc.gov.cn', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://ncztb.nc.gov.cn/nczbw/jyxx/002001/002001002/MoreInfo.aspx?CategoryNum=002001002', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } # self.session = requests.session() # pq = ProxyQueue() # self.pq_run = pq.run() # self.proxy_queue = pq.proxy_queue self.rq = Rdis_Queue(host='localhost', dblist='nanchang_ncztb_nc_gov_cn_list1', dbset='nanchang_ncztb_nc_gov_cn_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): if url == None: return try: # selector_div = etree.HTML(str(div)) response = requests.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) # print(url) # self.load_get_html(url) else: # print(url) title = selector.xpath('//td[@id="tdTitle"]/font//text()') if title != []: title = re.sub(r'\r|\n|\s','',''.join(title)) try: status = re.search(r'["招标","中标","预","采购","更正","结果","补充","询价"]{1,2}公告$', title).group() except: status = '公告' else: title = None status = '公告' _id = self.hash_to_md5(url) publish_date = selector.xpath('//td[@id="tdTitle"]/font[2]//text()') if publish_date != []: # publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() publish_date = re.sub(r'\/','-',re.search(r'(\d{8}|\d{4}\/\d+\/\d{1,2})',''.join(publish_date)).group()) # if '-' not in publish_date: # publish_date = '{}-{}-{}'.format(publish_date[0:4],publish_date[4:6], publish_date[6:8]) else: publish_date = None # print(publish_date, title) # area_name = self.get_area('福建', title) area_name = '江西-南昌' # print(area_name) source = 'http://ncztb.nc.gov.cn' table_ele = selector.xpath('//table[@id="tblInfo"]') if table_ele != []: table_ele = table_ele[0] else: return content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '江西省南昌公共资源交易网' retult_dict['en_name'] = 'Nanchang Public resource' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self,categoryId, types, page): try: params = ( ('CategoryNum', types), ) data = { '__CSRFTOKEN': '/wEFJGFkNjBmNTQzLTQxYzgtNDgxZC1iMGNmLWFjY2FkYzczYzUxNg==', '__VIEWSTATE': 'QnAQyc3/r6VenaNsD3N9NizbNQGw+P+E6kFiFSJ13jzqjGk0g2kwB5V0Kntc9wsMi2BWrroLA5x2G7wZzFXp9cYRvyfq+9DWkWifNlNexQajTfAAwwxwTYEs1j2HqAFRKPzmOEuCPkPncRO+t3YsexHUWv6gM1MuoFbn51QT+ewUuY/8FZBMImfUxklIPGmWLmfiMQJiQrlnC7z3sF5RmmWuoP5MQDORXRoqwhxWkWJjiI9YJaACijKj/p2k4l/g+m1C5h5MENVN5NBUH6XT0JKcX6U3x6heix6GMubJWfhEXa+naDfzt8sphrsVQqfLxndHZ+5y/L142pUh8NbguhSvJPTSIamouvOuXA40FfyWO480dj5KxhiBDh/gvHVJoEfBGtQsxkguGail3QJ2MQnH3rtczO2koJZMeIQ4v3Od+5aHkTYSbdm8XALJfrZCtiHQV9IwMoPTW8KMBOb2nib0tGvUYfP5h+RT/wVF0z074bLI3JlOdTxLthYBxyHXIFZCbiOawjKyKe9Vn6cMrcuVVICO/YOxA+nNHvd8yJQ/qQ8aUk1KbOAG1wAWLk1HYxVwg3n+8fsL8YvMvsJdYTVcAPnLyaIlKPWMrOEM9KcQa5/2PgXnbYBgPOO30mSN8qLO2tbwvZbUOlVKWeDw5eaQMkmDQpRO+ovhhIgHDdEcYUp47n5aFatq0uvJEd7/uICRGoE8lz2pc0ln1OhqtJlyKzpsA+zdBLmiQMGLJBzmGr6aGxd+zinnMApyqM7d8Zt9Ie1QRg3GX0a2ikdXyvsAi1UUaqDFaB13fB5mdKyrDexEUlHXD0HvhlRW0YD0m3hWgauV1OiEnj3w3Ju0j2M1tlTAqGPDUVFtkoFRAs61gXyihiNt2tnJO6S8XcFeZP6rPxH3YWeEdLMW2TfJDhOLNKeiSB37/sV5vUX+bNEr0z2j84SZONGpBcXrLq7WF9KKT8eAZuD4ATW1gN+BaCN2hkuBv9tpiEZoJTCqaYFxiWbRseiEe1GHAX7fohZbDkiwsASGCPtiahtWXxOOBPqhttRnpv+aguP8OdNNmdmQfL0MQ+XDPN7W4sxAqxdHoGf0lzUu8KYfetXfK9oWccJ6nWG38U/gKAnibKAIou8y0qR6mduD1v7UVXC3LSjNHSTPkxvbsLNJByq5zAeBa4HWHv046/fnz56p3ViWxV/rFiCdmHFfL72kH/V2zSNOCr1ec6J23vdZavplR4r68OF6zFV9Y8uFEPmtC4klHe64n+aqOtJs1vmA0XRCzGJ08AwfdNjSbGGpv1i0HUqEyQG2NCvEtfIHNCQcvCxU9rb2n920DGBN9aVF2LlF8VIVzDQq7Qizh0jtOQ934qHVYKjooasIs5etbXZqpxdNs0NQeZt5GbA9ysXkVqNUv1ca4lgVPy/smJc3YM64zPCHpcx6TucHMik26ksSTmaY27uJDgn9ihU1JeUMW+MewDVHekqsYprbIDnzYzpHTSZmA3JjBY5y+rlpES0FIROzzj9Ng8zCiiLyccFCCAEZf8RrcHiGUU7j7c/ljgh6b9PrFDQDAWcz3Vip5nGI6+Na4N7QmpawwncKFKUFfvYnmyzaGD3R9eCx5yKo6oIbo3M/4iIZxdDzF09CHZQd3WnUhNYcgP31ScXNzev2vY4LiBZidS8dbNhcm3ZMs4DeweLcDWthEWn2IVhqMZALreAbmx6cu3IAPrDBNm1e6kbguNW1UZI+6ZLK9YTKe+nnTN6b+4xk4gBbjXY8MoKnvcCPpw+M/jt7Ohqx8JIFaw4mEq3M5FY/FpRz1hQt9KkNfBvCKBIMZYJAq7kxJZlg03klgju2s1EniNP5UU6lTR7qMBeRCbdbvR5+G6nFzgM+G+YKNPiCFty0m5ab41lp9VkxXJ6qvyqCKfdZ7a1Hz8PReKqih6NZbP3dI6Ls3sV2JX30gq6u2XsjsB/wIbpv9/Lz6RIZedQUagmirtMD1ZfKWoCQOAyI/KBsSbLcOxHkgaMpIC3L68MNfY3JMJy2lSZfkwME0m4tKG0midI66EkcyZFywWoMnU3GxZtX9oYQXcA8sTW3O8c2xzEgE1N4s+fBluMMr/cSK+IyWswXFxu2yHete80frdq7tA/D10U9IBsG00pOqkh/z19FkjyrtvEmh9RbMitaSdiUMMMRzMDSUxMVlSAE61SxJpcaCrGi37Em10w1G05Pd8SKnVFKfb28B/enOUT/wAcQGyHy3IKlf4j+T4L298kYq9dGJjIxC3KmNZflc3y7gVeR1kOvOvnL5pyycRoFEGYqbsx3GFp7F4kLCBXHkFfrOzWFyrmE14yCeyCgGSlGmEazGtYQfoFeEr8lAD8HLOcss/Skwsrv8sglS4RaaSAGaxkPFHAOYpfAtNcus05Kj0z9vzPQmoAG0K1N8UJ4FwXMj7zvf/+yq/KI9GyyXZOEll52Zjsf2ojFP10nZJ1F2p4or9BYNLhAah+SDOjQHdSi3DBNRcies9qJEU9w39mH+VwF344u+tibtehwNv6Hr+VIFW+GASRsXxnBx2+SxTv2g6mxx5Keg2l7DEglcnh72/XmTDy9KrSrFiCSiwLoukwlD1E8vEguf4kAqluumMrRWRjWZyaFAYANuUN49/YZ6h6n3c8sr5zS9ceiK7TTRrgq6Hnd4K/ivoeHImyHXuPg0/EJI74F6J1WQxHLwAkKo1KE5O2c1xMLJV0deIAwIi3jEXuYHaUF6PzNoTFIWpXGpEVdckUx+Y6YC1ty1H3Q97eWfw7am/G1GP4WiotMFT7Iri72ORNrv4U3/LEljoST8CYwlesHDIERDgavp3UHl4OXpoNX7rPwN8N/Vvm0bPX5nLmlBGDhOhyqs2e4g1cAQS7gWYh5KTbEmSi7yQM5fIHPnXD94amrIFxxX0qwTXowXZlz+pJwRqJ0l8fMLFucYgZznjsV0aZFbpBRXTNfhjwY7fI8miJv9cj/ZS3W5RgVEjZHOkxwjf0MjYlL7Pgo+zln8tk4BvmceBh3obTb8oX1D0fbQf735RsLREEBkZwPHY8nw3G4KBNBqY625GsAGkQdJHwZK7mxt1PedLc7tNpELjvIug+RkNJCOkVX9ndhvw/+CAKgmDGNwa2/O9+kR+D2FZQ/Bz74RVkfZVwauky6J9whzK+pmp7UW2qTzzz1P3XWkq4TDWR37loewkm3M6UKhgeXH5oI5f+YB5kbpHz9FwVxdjAo4s3gUprbDdaTiid422W0eWvSMmJ3Qmjztsqxpey4mjC0QSfxum1eODI6zljomjhtY0i12XJHeaDkXuBAm4NhWwc0R6EnMJYS1YgkEJTpjUEiRfnIZq77FNMr2M+l+MlbW5cv43zB4kNI6/DRJuw4eNw38KulkRHo9lyMoqV0404ZLC7MxiOfCeIBOa75UqzpHo35bHG5mPfe+JeF2CPMMFZotXQfOvQUS1CSSC29XC4UrLGEn6wu9HxSb8pXlCMx4XuPgLO1mRHmS0HN1Zba/LJvZEUe2wZkUkw7ubRkt33jX9FBtKU00z4d/l1ZLeVudwidqzU2zF5JOmee5qrvCyog7YSGJj+G/iUrZK35KFZzvfKwmLYBcEvJS5cMb8v5v7U62JIvvTFp2CK6NzMO1ojBa4140XWunFKriOSxaauZeCuZ7lcw0GYbu6oOSOQCgsUEaHN5HmhcRkV88iI2TbXi4PpTN4UKqrMnbU0oA6hjPrTfOBM2EbU7HEYpi+P7Sa36i9uzSila5BYC0n3Fd//ai88+QJq8e6mB8N4p2V+orF6pnqnYH/+YLsW8DrIXjPaxgRGfdjSdcGH+BRlOc6BdvDEBp0xCLlIgeMkaGW2jR9uyBAFkJcvhKPtbXYmFc1CMtHcO0yrLo2CnCp8bNRpp5UedCGz9EHycl1YIuT0KVwYvyo/ngPHJPSzVk90j2GYFZiW3+LklW1RTvdOUFAOq03DlC/PFGw1piuxyeipPFy0onVxMLDovHIiKXtns5uN0Nrzv1UMpRpE26itdV4tRL2IvAjxcnn9eG7+tGlc1pO2NvbhJyfGTHfT1NxC+NZCzR6iOXRwII4apHZPyD+j7Hold/WFLS1hF9FRQ6vjAuxacT/Sb9p8kCgOW+O/rhaFx3LXj2WN/m4ur8wMSUlF3HpYPlIt2I4U5MNWtciFgnCegzNjLI98ja3pGjRcDo6xohcGkfnE648wF0mW5pmQLyFjdg7pAmDcQD3anZANklYVdmbpvvXv2ZbEWSkblFq7ExOhhLgilAn0bJbn810e0XPcy79zx+XIgc/i9qul3ATMeHzf54tXisCRtyml+fQvBW/O8+ofLw9470P2fALTAjyJSYFvPImWMOgDb49Td37OdceJjbfflldDly+XytCnhBNmYtrq1TZQrU3vxNiYXTwb4dJ7kDTET6h+Xwp+MmeoM1guIKTm+V5SAZVGij3io0Yrr8BDTZrt9OTitJg12CPCIx2JXdT8HqFigYHFpMUDhh2Xi/KKelqDHYbK+IjInQQCaJ8dmBi/jWwhgTdc65/y/Q3M3vQS6h82x6Y4gHlcxJy76jE/j3jAOJ9EIE1rqaQp5Dc+/Pc5g1pNALVSowQXN4f2IH1Ipu/evu3H7SjBBu202GuqVeY3xd1f5K8JKlgLk3zwMn354FUYfRB3Z8Vwp75JSInoy5W9e3yHnGfNnVUMYxb0T+hhBclbKPi1UtkoEU53QjMUiIeMLvYc1v9cElgiBmfUVDKkVTdwmiSU6miryV1mAVvMA7RshKPi74AT3kCnZZghMWgx2EJ8bZJaDGFNqukrx0sFz7+zrZEuTjwOWLi9HpLOX2dyqha7o5sndKHa7/nm4avXKESazFcHHQKUOxLjdVX8/VUZygXtDctLe50TnDUBySZ7P2DHkd7OW3TnV6icVLEVCmwMF7eMcyDC6R+TdzOgEFGOF8qv3EH1k0VEORnprnYeGUFoXkdc52MeL1NZDfkQ2VzHH1KSC18BmCmgaXkVU21nktA4Lr9nSXglHhY34ribfm/CY4LF8cc7clx5G17xRKLi0sjwfDu2Ra8VMVZCHjEJWljqHw5zMFkJDhNIRF40YJLr5yEg+88bglvScyvNZN3VCUtnIJk4Y5C7DM2GdDA68q+7BTfGoLOcwpGrAuWFesHhx6oXA2paZykfXNzZzUtyuOoHt3cws9L9jACYr9nQZGBWLVF1yUm4fWrfBljMPvRr6iaBzgRUHtbhUt2rICUJXcrrNKsqGhvLLqds+I+p5gSkaG678swB9Zfgr2ipluO4yBv/weSMzbiCswv6CRPaQdHek6mxxiEGCdqgzLhoiijQM9DVck9VC1Ox/eLwVoq//YAZcoFBZ8dgN2z8V6faesysLmomW6ZxGTc2t+1PPmVl23uzRZaoB+RPA2PD0DArK6BX+iYhSOTdXjslo8pebg44/nOGt69pkABVEBGfGadQcOHzpScZmh3A1LM9EeeRihCPC9gE7cn0qTncKOWz0Qs0lAN6TEx0+zrt6tgfT7DDNLj8GrGErfHNE3pKYDF4q9O2S9hVj8+5HF9zf55t86xqYRpbDVS+2lGJma73T3MrBBCSjyPVh1rsBA3v5W7Ca6dPC56WUaluUXyhMAPrmIoPMo2/AcAhHb54lbceEGG971sRpQYhV5kHYtJ0kUPtyegW64mNkhb0bi6MB2xNVe9mj6gBNxqX+h0e0p2CLKgQpsd+9PmFslJ7wrn/v7xfkGXM3YpKddzU2VL8as5bdMg5kFhv0kXUwtyrKOQM+WaFrccHQjU65tWeL6cGBLrfN1NYYd8bZR90vVXjR169ztMrEOy8ei/kzb/mlQOJoUgAPKFQXnCmdJdvnS+jyUxg4fO4IU1tqMXiKwmpLx6PIUrTrSOscUt4JSlWmM5+GQsGo1BB24Br909udaytL2f5JuBaHwUhjxhwv4bjsmMb41quhcWgr7XO8ycvAhCQPCexYC2C6xNJO3U2JtLy66YiT+EvTEUfCbbukOqRjRKv3i5XdR6p1fdA0ubU/FIUvQD8RSQYai/iQaj3nve6C4jLdaGA5p5QE/Rkx6TSyJ64jGqe3kq5K4RcvFk8TZ8dDCxuXzbZG9zP+UKv1y85kV+4XBDjcaeOe2zYu95uyGcSb7a/wfnNtsDg5uYwGsFWRWgWJKS9kXXtUAots+bkNzp3JRm4+ljntec9HisKPFIdxDCWlqOfpaPwgCoPLwvs2F0ZI0TXG4dihmnI6Bwm00NWbe9GwOCje179Msl49rED41zLJwyk6J3E+W++yBAajlnnOpx63oNQ7Pl7a5kEdSXtD6tx0SKbyGy+0iQGt1KKYOYYfB2HShdWpX7Lf0PkiPnrAFM9Mso5e0lXkUftM5q5J/WmIgBLECmTQAkhr93jftlM1HPcJycgEJYdE30fMPXcGQYcOIlecuPxDul7IiVA1/l6qI3tMxYZMXaqvpyPzgdrElxGTcG62faQ4fgw+ZJjXXfogz/NHeJUa8F4avUlWMJ9Rm2mDcMFryXrTS6Ul2a+39W03FpHCYLAGjb6RxxDMrO38fpfVUb6I73WmH5MhcAo81KR0lxWo/Ue5z9bLVBr1/yz7ZpowRONNlcQQQ0pSkGNbbHb0d1f/dUIDKxT7JwHoOUqzps2HS068laQGB5fphFdGqyfs9ZB+w4n8Y2KtYFjmfRG1SkYeqP3EcWfZ/QS73dhq33H3ijqkz7oLSomEmMnav71QjSW7JO/Goes14+uvOz6Kp4EhCEPxfJVcEgeKQjZkt/0kUUs0h9HPciZ0bma/gGSBxnwK/ktSCSkRCLFoPwSuY8VoxFs79V9D/Q9PfTgpJZBHqy21JDS8CVIfuHRUPz9aXoQxCbn0BKUP305AQWxu9J/TNPIXERQ2mjt/1OfgbcgTE9HAphhAGzEOLntaCZRKKqJ6kXN5kJpcg+NBUlYDdta3GdhUhswokuvvJAX/TYE0VQgzY5bPAQwvzDrE83v6cA8qmxMCJwHx7jQFHmHerK/zZJEbFATZA7TG5beeDSnR2l+XL4OkjQ0CGudJXMxGtyAqmonLvlwTcMkHfpQ5AyVLzAAOOr5O284q9aZLFdZm8epvjJI0hk4M48AC51DwQM7WJzK2aas3zVypwigBdpHY+4jy1T3E0BKvQjZiO8smn2CyU8MpEOUQGfREvYY/ug1Az1olxpqBVB67NjsLrTmMdQ948iTHezJXxOIDFwUBb2TQz6w5jk6hkS0/KkLJgiCmGav2ShyHfILiNjOtSh6ggqqAFHksEGDD8MO+fdYHxaIDi3BWu/gW2rtqaBBTiVQqA/N3KhyUeALiOl+RDhVAideylHw+kh3n/O2k2o1hUhC3stHEHxcPLXmXoVna04LtYFfZZSTUliO4iNji8ZYgfmO1JP90E0L7HD4my4ci3H3XyiYExNMaFEGL3rRISHIzKSRUFAgYbeVBoc7JEGZr8GVGRFEpogve/JM5Z7TPLdLS4u88hP44xytxLHw2OzYf2eqCv50Ux2wb5mgCjRG0xrubj62FRbCRxLJLfF5H88chKxbFu3/Da9vzVQ77k9LCa3fHb6tkfv1RXTMemM/5aMaMb58TQlwNyAImmzex5rjTl2dMb1S8FerYTcIlIx9PVdaqxYmN5A3T0u7u53DJH32l8P5wfRH7x2hArECgzV+/HP4YUZBPhqOeWZRoF+tZjyrbQv+srU/GRx2z1IUa6/BXBcidpznDD7a1uOcjXyD7jmTJIguEXW7sGm2MqfWPhpF0nMvH6SBkR0P3nrpDMte/0hIMdH8k1EaXm9tVq0KMoGVutL+vf0dhg8vqjVpziHSxz6+JRWkHTvc7bigzYf1T78SClBUqrBewxWITkfmMTBzlI2v+xZCCopTPnqbMjze76zB7L9INdT52NsYFupfYZ/mnVcVtDmjAsysZSWmZ7IjWSC1rL5IhcCXx5f1u5TS4z8asM4gkKI0khJVh+mGw01mUoXIDqlzNF+RjIK5M0/AZiS6S4K/BESTHUkVwkpmVdrmKUEcVeae3S9RFFy8axPLIFyiueKQxo4pRJmJa8hxgohQNEwGqSTYuCHDc0aCwZS2i2UQ+dTM/vex1bcvknYan6WLqh865Ijo15Hv1mT+YfjDi3kGjwUV2KdrUTQYQ/EQDRRLfofoxMML6vpAkVswa8cdbn0VQh2VI9F1NE7epGZdlw8+zQoNJXwEGIiaigSA8JmVpfN/a63PxbVhET7dyELiZxoO7GvUqI9BCM6lGitPkGl+/Di7w2DMFHR3+skcJRsFHn1HmFDrno6KhyE5yir5berCfOnA6aWGHHkL0qA4u25DjaGpv8SvYIb9WZaM3dBAx106tMDwyOeMcnb6INa1eRabadu0/Qo/8AHtjA5dky6gvLWnLMVADM2FTGHgt3RrSA3uy0s8FBYG1nxbA+rws0Z7egCAgRfuf3rE/BTR2DKRkF3RqnssJMnJM8XAahi8xP7zxQQijSrl0OA9ixJEBbJAxkGo1zeWz44YOFdmqkbOxClqIQcwqKLdxdSNoQt6DP3JkAcfZ+QwPGPp1bGi0H7c9pbWNLdZ/2LksjrgiTlh/L621jq6OfabquWKHFzpa8pQxv6GtQNPxYk/4yGj1io6i+JzfXZVqGWDv6gOKIW6bghC+Cm20088jo3GfaGjbx3LQiQj6dwC4fPJcLnZhrSFMzRrUa8DiUj9ZwJpTBcIPvoGpywDUgDQnRuRLOGqb0sZvppRyTK5c5XkDvr/efIsSj0G6W/FDxLbJaqihp/yTd1Rm4I2St7mH197FAqSQ9B11ZdZQyAGiSz18aTGcMXJNi/koXf2RtnVC9gOVHK6ZyuFxBW+GcAG/XHqbm87Ve+F66UKKaSk3YpYwoXnQStPCbNJXqMdzzaWQCTWw/rJk4r6xSAu3O0RP4lup+sP2xI15eKQ+Hp1NfBmhmk9M+NdbDfTQDb34vln7Matih5WndGq3FeiAu2/c88Tu7uxTBy5Bi7kmd0yslnudEfw79QUh4JccdvuIKMqRqE9gu/Wuv0jO3s9OJ41b/NamWah/PlL35Da2toUU7V2GN0PonXwASLoOTQBTfbGHhMRT9FSgx39qtZ1hEFtdTlRTNEI/2Ac5FLKwRUgZL30Z+RpZf1z4AdbpwxOG+NifHWxN1DvGunpIhPUFfQp7fLKvz892Mxw64U8tu7RgPjqVx/vjyy8zZ0Hdpvq2uP+t13Cty5u3jCCjdT0uwSpb/wh5XBi+bTatrjtNgCr8FtQ9vEPoh4tEpvZiBZ0XhQnC1TwbJJk11Tw+Z1y5aOKAt6wzK3KNZII9E2pJRL85lZb/6RKzM8rFVwWsAXR4HFVZsnIAa7UQ/WfpXb0Rs4gkA29vwPdu6P0/hOKtKVd8iEJDBgCeDhsnlNZmMbg4W48kVMPDTkdP+7CdxJWteFIpDiP6a0bnWNKAF0wczFw+3Yj3QnJannss9g7u5WhavZiOGCkC1IH0dOD/yDQcprtwOKhcCJDEGeSjHAQX7dmkG+dob6DgW28wHD1EpcpZxVSne3lAsk4GrXh9ZbUrNjp+5W7/6van8cFGuzApX9b+kJduAv2u4HcuBaINfD/xuJv4QmiZlfihefrFAzP07kQPTq+bPxiRLV00hhY0UK3YMnCRv1py/m8j6Bp3tD/V9i6tFkPqmEQYiq+76Q/anCe4NtiJN6w++TH8L5V40NHoKNTweG54dwxGIe7SGeZ09YNkQhKv42D7CxCkcpHSIZSqin+HTu/EMM3Zsh6Yl56EiCdC8jxg624cG1whjwBrAruDZdeU6G0r1HQ1Az7iCuVPW2HbVr+ybTWVK5JQtRuKQb11ZvtznKqaGypAqpE4IRCbCQ6GEMSp56z77VmTYqWFT/IaD7f57xlG4s1t7NjawxnD4NoYQmaWLIGxhl2SSCqRP7qznem98zB1aV9QtM+fA7R18QtUel8+8v/I0ViPBm1B3flYuDsEmKLc9V1TBmSyZ2/VOmeik026gOdNpkR+fAGsLnCcHY744PJkG3uiidDBTwJPVYxMqh2dF0m0oyKI0JNuXpwymhnEsFdWJBph2eFo3oiiI/RCph2Q+e5e2+QMd0yyomByWCT5T41Ay1Osisr3DFlh436SYD10c000VE7Rx6WK+3Fldddxee33R5oOw6Hu4gdAKSjo/RyzO1MQe/5fdqYgZmfGhcBVjk89oAcLFWKGWpk+AK+k7qxbDwRScuFkelGN6vO8TC8YsNutIZpOoyBAkvgkkCEL2Vc97xmwDfin5J+hJjYOzBBZwEK8uvwUNHkTM4/uWnXpeuHJWAR3Tthja30v0752hx2udM42S8KqWtzzKDoRyZlXEqPoOpHkOGWRadNdHM2Y3Poj8dEUxLPtucC9Ce6Kn5kXA8owvUkwxaGU6W4y3xSe+C9A1LZtQtbOt/WkAC6WJaM9dxvqZGBhMuM1J1pKxK7hzAICfrO/mPzmDPGDDDSNf2jiF2uiixl896rJEnc6Ht0aRN3QZcfTtxJM4F/gGhZIipFQRio9YIGKK0cGW+7SlTkNzZ+1kEnBLbNQ0H3i4xTkHtIVlg2MklOuZ1hbNY/qj91rueQsNrLfgxi8OB6pQ5BaJh0dwRwcry+QMUghXk/pP8Rcee68l0wJsII6qqdjQhrra14H7tzGrREi0kipJLtQsaIx9Jtlk14Hk/7eStM+rGKxAz4+n8YyEOQh3CHPUgNDOGXjeHdl+JuVkAihXhC/Ate1bTswLtCvXtOunQe+hacjFA3nX1qvX+Shd7wCLeKuGqpKrGjDhMFgq5ZlsumPgmdpBuNelVHDC4k+hX5+lPIs24vgt3enZxpRUxUiwfw3RPTtryQlQjCxHO9MA1eLvo5m1I6AmX8Mp7Mn/RPE0r64CPWsx1P+Xkx20RUjWefnPFPS/K0AB1lkaIi+6csaWLA25ZejPLL4kojqlRbN6KVYxML7umLJZxoWj2TzW2WonAaa2m/OFd5kheRS9xLcoAqi9h/4zh+7pFLZaNhz8t/iLgYTAKkVGt2x2MKe9Wz6oLh5zLxkuCDUX9fHMOuB7uEUgba9aqKP9Z3FmOVkjiMgv1qtjbjjJMu9E5+chdpva3oDWLV/lfKDq6tqppQXVYwVXQ6POBv5YKGoQSCHc/w35LuQOkBucHSl3v1bjm0KSW5Eg4gIe9kfs9eaVQ+kROPfOyCVS6DSrYVB7t20QgztcooNkMEk7q0oP37XBsfgdUFDzqhckckWx0numJfCZmQANsaXS1o5dhpL++OjTI3F8+DijVBesuZKW+Uslu6yysllv5ddjT+vfSmu3sXNaUUFbFkJXyNmdcMC1kgUCHU9IIxlrACk2TNcM2mxQOoQ5/jR+rgyu6epBA0Sjk2Kq4UI8e+z1R0v/7UG7m209gOsIMvGWnOazW2bcJXt3DXj/xnHzZXCAMbwfFVQUP1utyAU/NN3QskAmD+Q51zAtraztsT090kjgiAOwra7buyQ88x2wkS6t/OI3HxwXVsm5/YyI57eJeMhIHtVHp2/dYmHuSXpkBQDwq4a0/B4lnUjKzRc8jsrUmQ+YpvT9ofh6xBUwaiH6HIb6ojKCM4t1vc4BQxjkwwuCYPad2gLQGmJnkalg3njeN0jaGcP01Naxvtpi5fQi6tcakqyT06Ccsc8j4PRAPmjG7KI4Yz8loHlNyHb5/pgsbgWsxfXLXd/J21JRVsWN8PBkeXu68noT0Pg8ja6TTGqFxjmW4htaZd4GuVDFsB4t7nAGzFLigD4ZA0ZLUgJ+Bfygc63RrqmI/L2OUyEFJzxzjqGkk43mI/Z0eO7zfG6OZa+SYdIK++If+uJCaDZLmwCm3sRY3cFxtyJf5H7iXCqavYb+57PqGPQeAqc5tYwH+KM0wq5yTLzFikdIJCq2FMEtqmpgIa4C5OZii4BmsM/dYhNRxvQobSX3ajHGtopkA7IDQ7LJBLuuj+fMYoL3cMBOM4Hu56A3S5p9rIQV0yQCXDDt6QdHPbH0VgNlJisteaZ/fgTfLi57ZT0NbEDQKy0GPkhtUSbvlkJ+x0nCaDvfBe2rPtd1xiunIryIg6F/LACW8jDc64T7QSyJQxbsDDvuWOYKRmVWuUOCkO8Yh2Hi9HqPdiRToPT+7b8UrEjuP+kGNP27W6YMDUHEId/6UmQ23rWLxDAcjp3M9tR4LiNxYIHOp4A4cj+1wS9dz7IaEgB4c9/toGUUySuDE07xcDrTKsrNhVOyAuYOEzwkb/xqumeSrMfeSM8QZPG/C2YqMerFGecBGfrL05Ks45UITJjH75Y8dxMNK2G7i4mqhlj4ZwmQYRqvBtBFO+uEStFbLBV2BKNA/jFF13g+l3LgbS302Cu7q6QGZJ6zD4RLuVQc38xuvKYpEiYssEpoxY2sdGslQ/pzU56Rdmr+Ft7huoFd8Sb6nqyunSfUSP/AGHDT+EnVZUIliWJe/tLxhJCpUy1DEHB2ZcV7BuD1ZzkHy4+WyNWKUGLj2l4oRQjNpL+VeZhuKHRvsfxpF+7IUQc40JwEg8+8ZgAvfhfLSVHPGlVxbCaVN90eOFdEIQBaOfl+u4gKP0L/9iJEeh+kYctv7FsUkv9kbCAY3BRPK+tey/FtnTgoPEpfc2hfKgelP2ZWKkmcxx97bHDWrzAwWRcS+SKJY7HHKT3HgaO7TdubX+hb3LuQj+B7xKmTJzEKXGPJ+xIN1GJ0ogCvfgPsjr1hZ5HZqp5NXCMuLDPws8HCFWHVn6pKqmhRyZ3G2nMHjkmhMgQSC7tjGB7TWWnZazzgvK1XvdQVUGF13rvjD55A5jlYkl2uTeqr7LCQQNvcHVQvwYokWeOfAQNmRzaMXSugBkPFQ1QPTpa4F8vYuRCHOjNayYKNYYz5yQ0KRl3Cx3R3OL98NVaQlXzpGMhc4rHLWUmJLjMPs1udie+/huuEh5r4ZfZtieTjcPq6flTM3ua6GJoQ3Nc2/r4AAN7TYolIcThUJCqstCcxvn8klKtwbOLYOSMDydFNQxSCEOS8FoB4Bdmxax4uly1BOQW/ntTqnQVEILBuoXtxsnvjiV2TYz3HzrIzDK36vIB8RptWwW3Z7kUcstzMrucyjjHpcAUCi7mUW4xcRGK2wyMg9fXyVVo5zvqGHAWQO6EjWkOutr3cJnLtq5T1HSjjCI7xJTPNm6yeDhUlAH4dD9EhVxq3f/vrBWCVNyPKESjitKe47JPbxFhTst1GR9+ggDJqtaL4xeg8GcJoOtk3KdKB6uQ4CU3WAaxyVIorp9mLPEVmaSAHm4aAwffiA+egtjbzWzWMrzzs0ZLNt2wzL2ySLxmdbWlaAt+SQWXC0xliTeSP0YcTtV8MBltoyJ4vM/emJIHuC5B3XQiBEs6a1qsGxVkMTAFRD2MEdr/dG7RmdpB9V/xW+g5ke8F6ScpIVTyup2NFDqb2mUfJ6/SA2ie373dreVvFFbthSNtwUTdZVgUAS02SfSbWZQplVHDEeREH6+2YoGKfE1w+RZDWMWFPxwt6Ygm17Lt8NqzZZO6xYT5fcYqVqye7WdiOyD44tL7bT0u2Q1mBEUTPCHIG5hK+7KyhBbtp9nrp1E6S8zSHtGTLfNYLrTgFEmPOYGgDsRvLj1lMq8mWyqRKQ+avKi1eozmjTOAalGWCj5eA/5vG84UhyldG84HUr40gLlW/6XEyKebilO35r2YwdALjUT+c2H9VRvwUC319j0KdGJbBjP5wt5jmTEWqYjV1/Z1Pbo1OL5XyOfLKGw8nV0LPay5TOAL5gTRgIBX1E8hmf53PzCVZluvs6ON8WAm8x2M4nrCAJ3sw2+v81D16T2+evb6sqRNrVBJKHRY99YjS3GhMNdn08uIcLLVc3YiZhLTgQUuaoOyxOVtKxlZDOE+Tx7VVRl8SWZ7KdkdZH5o+i6FFF3LCc78Pox2RiDF+PvHY5N4uj4MGrq+uxEsO/GQkn7Np1I/HF6tcKOdoOFfgOhDjB9/eWEczposYjSntEPSMWwa2EkESAtHGPZ5QHj9PF0Cd/6xRvIvWfWhaaEIbIHCaM4BweMUebzvsN55MnlNGNdPMtn5uIXorycD4yX2AMimYJktd09SWRjqwhOXd7hb/uPOqbTBX8n9IfXnvy13nmd9wwRsGTL//zx8TqN3W/e3Ahrmc0Tbs6ZKszXPcMRBNiYROYatVi4TfDl27SRymoM5kTb5f1qLxpAocZKvoxzMaF89YXuF5C6Ry4IMsW1SdCCTP+VKODpRNp4fsFYuRr9RH87Vj2uUj/BTxbm9Q2ms0mkgc0PLmDvNPyjK2wNDwiQ7qoT/vVgzmPi9TOfbvdAUP7gNa4DXwLXlOVAShaf6U/not8+5SdzZDajEILVeIqugs4WsCuBP6Anl/znWp66YjwuSi2VNeyvx2ByUHDmXULJKpUeVKWZ/BXgtyn7dEYHZB4F7e+TBffJNIHv83Sd2odmH1OhkhM+1JCcJjpmBeDRmMU2usU7/SJWDUGkxDCu+88STWckOg85PG6r5X0hsXYWbVeNZyuyIjTyGIguDcnD8OashDDkr8DQX0dp5BILuIP5SWDra86CClV9kdkAG/TDX9lEHogQw7Jq39IsGc77RQlKxdaHFiGcQJbrEJmtnL6uL46QQTxJTKddPRSPUQMUUCgxKwIg2AEdVzGCOouN2+YvZIeivfuxcNQLbwLJyIomyWz9a4l6YM7m1so9CnEB5Wu00k5gGvjq+z8hxZgbVbxQ1T93rbpBf3Cz9sm9QVlfH5GhHfz8Sxl4Nhg1R5ucSg0kSMnR0XAVe3M5SsKv6KtUhwcHIdFGDhEdRr7Igq+zrZtPNtYoNjTL9p2vuP4O3GuQt9/x9Z6Agr2eoFiFXJSqCfVRfsvcbTQhmp0FVNSBnHHlNvZmhkt3MMLq9w0MYptaLGmZUegXnF7uxsKBgQ/FONgAQFhvn9RIyDp6zSBaUPDu7eXRdOU2w8OmY12loKn2ObLZdcYD2f1hXbdaWUuRl7rCKYgoapdHabiBP6+HtH2iFl/Fptqixl1my11BuPN7F7Ocgdbg8LIy9b/6OMe7Kam5dAhmfy9GKAJbRPAe6hYxFFPnwYMqsGOtQX4EoJcYI/HDcBg16MtC9lzrRspHuLe8unYTX7IhPH95y+B28QJWOS7imhKHTyVH2Chk919ktOHYSVYTuBMRT93ZJqwugp5NifI2YRgRNtn8CgNIwtofJL6JDTfY3KSSRWmGM8oX0P/vRo/rG4U6XSKY9jcrA1ohBx/k4lVpPqAbsPhUZu7MOWIK3n2ptgL4JTLyRPn3BS86tvpRRhqpAmhUWYyOzAvrtzb8mDvuTdDTy4OzSYrqYqOrI9j6aElBLAFvO/N+Qsi0uDTucO0VHig9wgG+lUE0iJQgkzKyZAuwlw6rGuFfNzlY6KYOcxxmn9oDLZjDQaf1+yA4bTIVp8S4Kkw12qXiVpvdH6AcGqKvASDPJ6+ayGWPig1IOt2hbR9dS2AaVyBRTZ2cxJYc0QrHQvPM+uN1b4Tz3AnEb1NI118f1clRBgLcgYgxcphF04KEfRbcx7LbF/y1sgMRHRYaHODdBYZ7IMkIdvoypWmaAXDCX3AuYF7aO3+W8BA0ThUshDJ7HOLywrgzkxkeyiJ9IYWSLBYKnIBE58ciw4iSYKvYGL6syesCcNh6BQo+VpWRaEpdSkDml+DvgNPgttr4svD3LMVsVeHvqPrCy1znc43ZHkX/84UwA1NDd/uLxw3UGr7Qiv2laboyc89D6RQ62U5AVaRso2pcBl0SVKVtdGa2h1eSaByDfYm6sWEZkluTfJfEn8RErPcQ9lM+aPDikq34k6frFaDY5xS0tk1nkOdeqrN2XUsHP4pEw+HKd0TRt24XpgTqKa9HjgVhSu+lyoyyWnXyO04Td6zUK5cUbcb+/jSR9cy/8egSRSsmKow4Ec89KNAmnN7HljkCGXCdhpPziwKIfup+a6qRu1EYGmEsjXqKHBC4gVgIO0XcfVHxtm/Kb3egbHVe8xbS6+blhZwT03nhbY0kUWrC2fQDOBfrdLL5NdQOh7rwDE2B2Z5pwwe3lb7M+cFNx57QXl4cpDJTYjJDUxew1Ne7KIk7QYIt9EfUwsG6yfdUkwZq90GT8pvQ0Ga6bmgA/cAYyNme1WVpGRaFL5ZPSGgyZ6YaFmyP7P8/zCJHgjWusgu6IwX6ad5gw4VT3GU9FoIiBFMgwV18egvI9x803oPepIaAnPjcteWOzVcKsWk+4H5bpLMrPfynaWWhcmNEsO7UfPJtU+jNuoZFya293o/iybdLXs9QOWHhoce5IOI12G5NbBFK8yHVxl+gcs5E3mcTGC7W3C2uqXryS784fkrvaaJtYMVPS5bZz0FA8LlXUBUYC6DqQt6SyMvK/XJMYSDVwepoCZuvxKvAii9IH0XC1PveYPpdPZxg3mkMpitmKZibVN3ApvKNT4fiHjF/4Ex1k1rCcWq+h3g0lFBH7HLwyx6CyptV2WlmulrNSL1Q85q4aht3K8GfyOhdGR43kt8segSI+xofrBmUsIcqS6uC3892PtcEfqFnI9X4EwjV0VChuOHduxdwLmORVWGJHGK3l5qHOvh93caHufhBdtr0JrNH5njtN3xIWump+5HgavQr4ElZR9DdoXwNJkqpsZm2l7gBKHSyDyfwisvprBgDRmrHmfuGYlwoXdvemTFbjbPgsLXLVqCD+lDJbngCsmMymn+xD2nF7U0R84/numzTU1OWVQ0cM+ANrPkaGFsBDxAu/CJOJ/G/j0FhlOltax/IWA2Flq4zQX9aFZBEcxZhUzpas1m7YkHRVhnvZ1IQPGk3WnFHJsIIGu8dhTJYkMUNuh0DQvTxprPaHvPmZCMylyTIlO2NLyP1ezHIxE1ZsNVwq7QPIzc9vVyaz8XD4+hoJ3hYidUhA9GCW88rbRooO0kT+0fNQob9X+SuWX/T8714Zxzf4j5FvxUh7A8JhEWLmaK1OxD6um/fsQCkhV5Q+1xhhlkXj7F/WZYTFy4w2VL6BoJ8o3fFp5WNy/Sp1vW1ozMV1wWNndwoJsau8tzF78XkYrgBlkKJorxzvlQJousWBH6yzTyNTfgLiA+hutIhsGC3s2vxA8BwrpizrKTK3ytDKGXVn8iUuqYXogrwrpznRg5XO76nbGI+gIK645imxrXZpi+LaRm0zJWIU2UkCAU5MuPz/OlnfFUVg53iPwNuFBJdwC8hAOLe+t6RaiWQCDgtadLbJdA77X+IhPpZU3Gvz+BX2LRWQJRXCAfHZI7TIDZdbFXRvdWa4f7ddEYlpPCmkEoPoQgnTPYRiZYwu6x2mT3836jNN1ru9I57hzjsn+4+0gZlgGhi9XypOaS4v1Ly2TuowxfCjC3MH9xGwkApTIMoc3M6OQdmW2JD7dbeTpBOVRKmzKqiCpmycp9J8y1NKmuSGDfqCYpLkY2fJg9zUBzVfaAINfG/Di2xc5XNalm7/H6dD0ou96XF5lbkdctEx/BwOn2kfT4sZYNM9bsde/34BBPeebO81nxc4XNhArYVkkE+qxc7EL75oqn5HMef7GkRYJBAI4caRObFF4f0erwAWfvo0HS2CJmqJ/O+3T7F4LGUAe9847YuEcXg1BcH3HOGrnGHfp/3A3NJzVaJmTsjdW+KFc9FgGvmG31lSD3G+io4BeY32UZ+JBFuI7QmNQZQ8fax9Soo1h2nq2B4dzyMpOHNSNyW0T0dQ5MBjD+Htp6djBjp72uDpronYutzwo0cE+hl45bktceexd9eOp1JBbQDDVTSci/YjalMcqnQ9M4gpEVmvRdfx8v9erz7MKXTFNXthKaQRV/jWe/i0957DLKJrxY7e2DpLIlEVcOld2HDyurDbGi363skPiZKteLj7rJcZWpvO12AcG84IsLyCgFgeh0/1aHiM4OSvltyH2U8ATU6z313WI0KCe2abXn+ZdXbO2tM1g1AkJ6LAnDYaPClVOaabxDe5jZFm7YXgULB6FxfoC7GQcBa/DMPOnMMJxY/Ku+kt7gjH32T9zjIYkm5M5FoG2yFw2Qre21juDz8106bK3VWhAQVMoxB1xAWxZXWiJgSvYwKtDwllJSStaS+TgJVSp2IV9mixd47yJRucSBRbSxrUrMkv0xzpbReSpOvky/t2OAo1EWKA/M99dblLJHN769eeuzXKiMYaEBAObgdzp89daBe4hmv1dnv62jXPAkKQs5ZMWTeWqYZIkMA9ICytmvbnAi+umzSqEvaaras+xcoH7YCFqntRRYQdkvs1m26mBFaJoziZbK2/SB4itvH1SXTYI8AuG9xiiSanPG4F/k1ph01bW+gJxEtJ62nyVfqge92w93suCGupxAHiis5M7W+MfvzmWyDY5Edsy77WGsVqhmfVrxJiEJh4nP2iyCAQSfRKAow4DM/4ZiDlSJJxioRj9+s7sT8pX5BVcJZDS+Nitx9k+NCIvoQAuXV3SU9f80TmKX/z6vuYdzw+m6AD7Ml8wkT5NZ2hsbbKlFLcOiQSMY6x8+FJ1qzdPt0P1RlnwM2nIohSJEw88JOyeDabxLDAc4FRd1hLcJRFeIbo1/9vRlZeLRNBCU2mkp3/c6Mc7zvPIeG04V0vXZwFCbYnX5w7lZdo7sD4RkjgTxk9hZ3CHXlBrMI//W6u7C0XMXajlHMmG3jSzT26aQ85UUPwnvUnneYJ6pjQCzd9vZ6KGyO6Up4D8P/2n2DZenMedUpRJZShcHWzZg8/v7jWWbpRpZIc7wcESOgETq22T8MZ1yopCc03DDP3T+kDjH8hzOqkdMKnpzq1nsJXtORTpvKNz1ngJ59O9ubiKGnrDh6bTB/UkIYeF12st8BKXM4Nb5qIRe3j49qG2JCIpIY3+b6Mgv57WcceFwgELhMyjSc3BClA4xqqZZ2iazUJRiA34VCvoa7DXyWa4Lf0WWiWdWtO9tDKXu64bD72rmLXNAUr78IMZvJVD/BvdXVPKEKCKbMN30ePj4kL6TU73Q39ynvebXa+06vAoRjiOSdIsw7NTL1l4pVGef3Tb6M7yr3rC6mJtIVtghXK8F3NPJ51YU3FdpBVNoqCsn4VqPMvckUxGdHhwKJB+bhjsR9PoC7YHMTqv7pEy0HtuPLMvv3PD7I1vRWlxZgZaFztMZvS5ve0QAYrqgG8asrNu3Lkf6l/buYrpPtylEOtE+GUezMKrbnk/r/GodOqFyUxrfpCRZ1JXq74b9RIVFDgA9g1wu1wLsGvKVpxB+FDKl3zgrSh3YX40sz8Sl4fsXrBC+F4uLozk6yKile8eK+i4570X8wyvHXOvUGElfZq/9MMGjqZDAUFoN1AIAcMMWLHhzcLnfVvSuzt86qh/9exlM2k4CYYgnM/y4FHT0HLbXzq6NDBLSqau3IdgMNzBLpBBH3lIPhLzVcyFaDGnzzQw+OnVPQ+Bb7AYg11QibDUlsfpQi932GR2o5wtUmWjJheKquP1qKTJHc3BuaOTnxXUwQYe8zOI92WTVf4xwPLMh0SdC8O1CpTMmzaVqGxyDPzrc3H9xHaZ9co3lZJM3gKvQ2/u+hD7+EJzGNTJmuY73g+BVCu04IgAdUZOw9djPkhrKXTpmIv2kPN6Amb0Q1peGPcXtob1DuJrWUNSZySJ+8es8uFCUN6xkVFJZJbtQjEMoag0P4o3bqCgWYP3CZWqAeaYmwwZR+CePHvFseYm75+iC8UJUOUJ4RO9Ay9zWg13l4srKACqRpoUI+T8QOiy5TEJgsfozjYbTXkD3EHFzHHDqIxNclchD96yfDvpjuN68ubbrOxCBzcG2gAbiNIUHOnSQHOi/IsGP3MNLqNFKahQHwSAcfgFN3wrSTQGkkLrMP9p60kIYHiRrKdi37hNwkdgWLVLtYvyRdy27BepIFo240U65qLzzGe00NhNRSAIM09tNFnnTKPC1sCWpHsJYW0IGTC4E5kwonyybCwV5CEHDZMMZN/v5sXPnHki0CcvikSLyFrQB/nqQRsLC/ROcafAP1NvHIcs7MrDZ0Vim459rrt9cuzP6ohFOArlHEc2fnbfBTxUNS+aqiY/FkptaMJjpALT4RNgeO6CWSEnycAKXGBAUcTYBmdYu/dO7xMBNiKvUH7d46lnpulD7pXNMB2WEW9PwIKfjPnEvJE+KK8cE1jRZTDKQ5z7zi7BqSMX3S78N5fKyOcR3+BKcnVjaeDPCBy7XWIZa6cMlbeJsD6aAD8oy/2GHe+KwqoKa2adTIYJH4+rzS+K+xDpDBjbRw8ooDmaiMyw1LtGxM2IaT5KUElyot19G99SjjzzwSR2Bty1T9VqLljInouaz5j0DrLE2HY7znFree8AVGaHkyUM7xuCf5VPmcnfCkTagHm42wMUQ29mq8NOpH+UUe88xhha52cXIFYxprN02N1w5Gfd2O4BYVpyyr1HVdfMPOU7uCl1rbf8nbNTTu6uLlkXxweuozNJ/XxG202/g/beXlsu69oBaS5woLJRS5/Pnstnx2h48ErvxKJTJRNx7Se9/Avz1kUpVEZa7TR8FY71WpSgIrgNtECktli5jpooHKbOiy0uSqNdai1AvMYeGHRGCdXPlNxIG5SwdGmc2Wv2ZYTe4xT3wvkcyz9N28hbA9uHPy/KSext/PzNnSlN5go26UcyzptFRRecX0vX2rCC1EDGCLZNI66BSVJLb+O4Lia5N8XXayoe3z+4WHU74gsiiJBGcjJ8NH3ownPKoXX5SyDa+5cIEc+PWp/rGOi2nxvZig6yzUtIVj+wfFsdBHH4de25u8afF7ikZY328JPXv8Dj07nNufEIXpHp9ixOeiCMYrPmiZXl/EEUf+EUtaYX8IE2Fh7BO1tV4iCIMXhMypSI/TB6NjQ4nj+YBE29MwSBC+VUNGKzRJjNRqhQsnjJu44dKAYN5BGCxgCNFDNrNLk//k3M1pY3EWXyfiwxa68/9Q1iXvBbJo9/28mgx4IZPKd7DfY7qxY4Z8w+HlsCengTWiWlrMgNtz2aooO0Lh5VB4C3bVs/IWn34/HhcXiMkZunoN4VzhQIc+q4mTl/MAPnuGJSh40AwJwkNTSNkMNWB7a6CQAl7YidBhFMFqXomddRB/kGHBm+BYMgyGNcPZC6GguavtzizDjp4o+4+nsUzIEb/mXWXfOrYYNKVlzBFG60RBlvWa9r4RZAr/kYNNdQAigzvZEENxDgkRioFxq8g1C1ddnBTpZSbfWR3/CPAqv5lPwFLN9e91fhkMY23KahiqIly2j5uEy6jnVe1VVnLqLtq/TghwEuQDDtBy8VkkSyMLvacrN6wPg+gIjtlM8qdFCnndBO3b9mZAzxbyiySE4egq+nowQMG0ggsSNvVYMgfd8l1j80cMP6EthKSA5poggIWiFN4hG7+Fi4qKtZwAzeBfCfyaq9C1+NA4IcZcSKBZr19YKW+aeiwxaHYxZaCt1hjZUfGxloSGlB8SE8dy5CGoQW3GDCmymiVNHFEhiTekuBWrTYs9bTANe+LwSw8xx07JgAGIU94tbqoYVljDgn2Q7Eiw3mT70SxofKX80AOUpebFELbudKRvH0a5rl/fAwiwb+fHbRI4x6V2OoMPdBiY0XlQfeqoeqF2mO5iSe1c7d8JKB+/CZqyOyf+0x9bNCz6qvGx5TeaudPynD6YriX3ovPO13VxS3uTanhX/x3bWkcEJ1M+kwZp3WXSpWHcg0ULP0fsfhWs0q+Fqr/v/Y10fF9nHfzFVoxPtXTg2rSjmOZ9446lKaFCF3sxcwwQnBS9T/GfXyHnb/SVqzTwCpVOUpKql5UY75RKAoCFr3+rMWS2FxEXOrWTh32NCVwraKbdZKImxhieYnDx7m1kWHCshLgnuFYHz4rbU3Io594wOR+UWOWn/SnyVHPfKyNjgsgGIPECEv7CcUUc+eBsZj9W5kLvUTfY6Nd5BwKLYPW72GZS9MVSKqXWHxFwnxD0aKHXRbIjJUpcmVRMmHgtQgfJLDz9KYhRu7wtGxwxGu6UlL40tc3nZpYmecIoXRmyGfop+6fRXNhAbM0lDq7thzQiil7+BT5a8jiLo7f2kUym+HUbStNHVWRpJO4HEkAqStJSV+Bd0gDljBd17G+deNZgYrDkv2oNp+2Tp37wefYgfdQD8Lb90t3LVUXNyzF8YozCYMl8IMfR/BxNSledLri3KNb11iPByxn249tPJWCx5nRkf+deaiT3EmsOhfv5tr0Pr6Eth88ECOp4RMSN2gPv4XZyBeMX+XvfJNhySzyTA9TBciiuZX9QTpx+zunWMFIPyNzDAfzQTu4qmiC+hShPzlC9hTLtdffRgaDGfLpWulcam8Ti/I8QYU4NQNrnIYCGchwwJJwtBNVX+sZwovS3mgMaG0aQAHUfeE++Efh+69zjTCJDmNbBvPrzwz1lnKqDCIhFRJHLAFAjyBLIur80or5wfxWdHAgRAeGSYHTlGVIUvbDASGyX8qO3b/XCU96oIP8nbdzpOcrHJpQGzOwkFPtMkhDgnaiIJFQYzFde8GROkEjSn3SCBOk931dXfansNSuMcnuW1x+gtS7On7fM2nuY1JN1gpb8o5tIhkO+eyB3Hv6JDCw2h3dIN/SGZmgCSIULDj2989UWhpEkTWttTCGV60xja0CpJDI8+vjuAbhMcA+yZiUNS8T84O01IPgYJW+p/j5lRAWbqvxw+riKawqtPOfkVHyCvKQZK5nvRoO4AWUugSY/yM6G/GpT4J369b8n7b2kPYvuTmRq7rUKs2Zn35qNDbVj7XoXMxmQI9QkkT6f0K3Hji5dtCwn9KWgamJyUXU9QuDC+fEN33oGkIKtmKT6AYZI+n0r+1qvLe1+GhE53aWc1t/T2Lo5DtP128H7LGgTVDjGVIvCOJl2ig0wmKHMAE6KTvSTRvhSbTEy8XZgvlBkGVLqpfDx+IFr+l8oh1HsOlU1wGHTB2/rcpE9kqqu8BfUKRYH1zT1YA1OcV7iN4fELfihU7Y3TDZMPskHD91wyHbZfcvG7flVeFsZrfqT4Xv6Ok1HZiFZ1EMqcyl7W+bc2pYCZJd9vxwpdtOx9BjRjRRZemXYoK5HAIAYAolkBOYmwMVwvt0rZXRf5iw9qgm9Xbci3772uJ3qm8xDyxUnak8oMcBsyfcLu/3pV71SgswxkjGe4LQgFA7IO8Ss506OhTaD+tJc1qevRzVLVNe0g7m/V/WmjQQHi7VuNil1aU0tCrmyPj87jWC/cCjosDzAgIoJpB3hCepnyGuAt7vEUWG5SXh+Q0QzLuJRzagsNbai3L4PLmJvzqhIHHEMb2j7wXO3y0F3uBlcDxZbSvcz8uXP/uK+Ad3Bll2wHolzy6ZVdRkCUukMaL+K0e91XcBEZ7fhra96c7SxXrb0WDAObxj7l75r4giupIzumWiTha52Ei3HUHPxQgN3y+L+b26RKKSz4AUlUK+eVeYmkX19H5kCgukyaflIpHGPLuZvWWjyt8UKhDdELYBm56cU/zuVdV7sH5mA1wGZnopm2VOk6YSCJA9tA2150XDeYDeNclVfyoVZ4DUO41TUiF2Leh2apAJc9zmo/ynDXck/O28HmjH1XB5ZonuV/y4QXXL5eqdnKqBm494quk7PY9T75+jBmgd5TnadPODRKoRJbHzmQ/8R4P3j98Hp943Kzm0QKDjUeM9q3OjzJgt+CheqtvIOop5p528CMughy8fcu52kotUMdzuC6EWa0Bm30piWekEFmLimsSq/7gFiyUOSCA8Q9PV+HosWeYTjcx660zHEUbu22KSql4WYlOKXrkPKySvEx0v8nOAlJY5L5jvkY8igUoihpIcTATdsCuVcm1WAn2OuimTk5pCTxIjzhHrlYC4+Pb0Nm+hntnS5ZNY/n4kt/l7/z7aB5wz6vry0/sMDxqNajxG4Ux+rNwbCBUOQd2lUJMcHGkuPfwNYzNXYYIqJwR32LBSZ4NUX3RhctcryzB/2uUOXec9sm3DMaaVd5H2rGXWfiZAh9hCxdsI23pMJyLklulUy9TgbKBsOFnPxSExYvqv/TfbELaiY8on8qtvHC5lmUit1MnUXKlIG5bI0oeoBm4sqzUdAoNW8MfKR+px4lfzsPgXbO9DzbUTneBvz53n+qvagOKvQkhHbuGpnhR67CeMi263RMX6cR2UaOxXzpc46MqQMsyMb+M8ug223cCwr6tIa0O+ks+9xT2o4MzlcLwTLrwmCyHYOm0wmAc/qioZr2Yr8Eyr1WfJsXWjR7VyN55vEhJyhcnmyKHPSdgirhzm4X1aWkYITvtEFvelwd/UEOVR1SIJCd0GNLEf8dPyGqMoklkJ4yIw1MvH8eEOV04t7x5Zq6jHy/HLl8QsmU1tCaK01WDHXg/eycvBuL/ToQwNzhMnhLumqLpeTOhVlCpFk3RSwmsxQiu3Z98Vt5fP/BaOI6ousUxlqGHIsM7TaAoHIlKSOQH6Lx962D4sd0UAFj39PKLjYhId2WI2LAUJexw28uV5L+SKoN+DsFXJzEX+sx1epZ6qx++1yorI6ayS0OcOCYAMzQ9zJCRbn/Ai7F7fmO42Hd8YUPYg5tvpANhJumGVvLLmwxn3jKk7+8+KgckeSChzplWLE8H0V266nZD//fWoYlOhvJajSnXwYVrTkW8ycrRpn8Ifa3PychsMItBYIUJFg1dpLshZyULQnZjl6UO5qZn9YJjVyViXqj3aiUmTXddlxyyB/DmHs5YWYJSd1AXsZVLNOlH9NDGwKiBHvlpkLcLPiUWIj3Rqmfx0Bb9Wz3hYhiRby7vvC+Pmx8xHoFI1Z46dpIlkfeMHFYBMfOcebTdVDyC9W4NGjAz7mxwoLOPLoS2fb9Lrx5GhUSjaC4X9rgdGJeRhGr7Vz7ePQp4Z1IPbRNSPRylAHrCt7xWLbq8BSi94588IGPKjqnhsz3JQdy31aJSVlhNcAdoiRAygZ47rouEhDpHCFwlOHxpWrJclFIpmoDyDeuoOYpSom0dqxJhcFJi/udohOvdpNJiEj+Zs54c3y7sn6BO/5b/CRuPSDWz9VbR//sZy2z6XuZCMHCMd0GbimBwEjLSG3qAPFi+u5tKoqLT5wd8M0AGox2q6Z0MKP+Co5Am8Nc7ylKAuBkmDS2R/Utq6aRSbLjHI+Psbz2AVvhp9ZjNqM63Umzfm/RHIf+ZzquXNCnRhkWy7XHR8hy3I51EIP9NGj/pzr2nrz1tVMeNBOaED2cabiH0Q2yV+D/3mM0Igjc4R21XRA+Nqu1FzB0A59TlN7TJM7akl6F1xVRG9gMv0JhDO7QkFJn3oDEaMbMaiPvmTHchA7dT6fpxCHHg3++ZcgBc+S6JRO+CdYGpj+6QDTi+h3vhPjYAZEqxNOfBEQUJjG69Z+2/+IXO3wzvjFA5bmn/53sa7PukK+KLubG82XflC7I0hYaCbFOCDXPC0WPxflXP238Ej5JYYV35MenejbaKRdeDz47PKPmU9NuTITVxGgS8FATI9hLXLBPtjBiF4FLVGfWMmNU3MJxF2dEDt9wLUO5JJIsf6/9DmeU80wYk4PD078yexYnVMOi6Ig5oEZ2bUkPYg+X6bUDzj5TVZEMHKpz8c3mCTYxgnwrUxSv9Xz4QncpOdvwYm2k3rNhKQPCmv/CjLG1LPhwY4yABFRHOPYcanQr2Jv7pphU50S3ew7tWfDXKI0ZAe59eEW+FsT+pcnLDpNGqG4A48askhPa40ZcQzrar95IfHwCym8eMRWW+FWbMy2Inu60Kfl4Kr2twq7N7vZYBqItIFwfAzhz+T2HKqCTAH+QBi6kwnUgiU1jkp+nMfg0KvDh099ug7MFjxVf53cuFokhgCNl7HkJbJKCMt3a4iiHftcdxQYqXqUNbSo2PRG5iuRskWka4pc+UG5sIWOrCVBXDXy0BT5FvmB66JRKnGp8ixdQHS94RVADQL372NsBWT4vv2O/BowpzQcgu208QykGFR5zI4NWvmKvrW6jvj5cWLoTMHuRiaPJsiVOTFV/OV0ZXo7SqOtG36xmMAW1oP9JJhVmxlGJI8hDnMKwqZmifGfChNovGkSyUmxbBa1BqudSnWjNes3tz4ktFjXI7LL6v5c/FpHIhnTNy19Gb4wkTw4/bZPjmHvir4T3xrcqn18P/qzQyEeVQePhpaUPxdx3ZLmhko7nuq5/fGYY96//oq4ictZjMGNmm8Bo83kUxxoc0QKS8Vpg0zeKhvArDI5JNo7/1sLd5NeZW8wDA7KTmsA1UaiEHsGFXl4M6unB+r0e1FHszLr7UFTH4tgFlOCxw57o64ftG5cySzckHqKNHVoHNQPm3/1/n23+sXqUo0FyJVRbMCwD+ak0YNdI7t3Jibcf60B9KVMAIL94Qqi7K7zn4Qy4gr6QHEKPddmM1dnRKIKpcGUKD4BGKcvQmL4AdEEuOXkbw1uTKUkGuWzbPWCNJje3GcGqYauvjgQh06OYPM5KMrZtLTvX9AdxQeddS/1L+KqDsyeHT0aGXzqqufyqNoptSmHVvw268l1UxUaQd5ujYz3FRRcqUvCs2a/aLkprLGiDnyU15mVNMv82q2VEtsZIby5BLhry9Rx2gf/CR6ma9hFdbpVKMHHwNeWNoL1Os/lRMcSV1NlmdJlNUvoe7XDs8+2hFu2/bOghBG+Y6x0hBwqMazKT82ij22diS0e8IObTKVxemk1YxlV4lTjgMx9uHlqQhIH0DBBx0ljCi6bJnNbXvyvMUCwhK9geNFybsw8o2hwum3vBRrdKgWO9Lb6/9RXC5o/jy1E22uPP4JbEnv9BwLt4pnRqUlOosKZo5oLOfU3am1/9R5CcEnn8w4HO67qtMuuGAc4ZY/NY+6z2ghpBFNAXpj3RjgUCinFhTvqb3wl6Qa2db0GFomGfTYbz9+OOWvfLX4ClBaJ07PrtpkGeEIhfSmRbmmo1XXLoedw4/16oVbB7k2w2vv+f2LQXpyjB9NIy3t3FAlbkSBYY6+lJl1H1RucepZygDfXeypsgti43t1jB302LdExc00VvXVw6Pdrndwc4k2FovtxyD195DhOHHtfegMDc78kfEai/I4shk0z1MevqR+AhSwcbDtQv5DDYjQJa876dd0NW/iRo1pe7HbIL4V10Bt76RLYfm7XOXvJFDe4tMOGeO6ANh4DjlRdpu4d5oKXgsWduvWoEI7FG7yHnuh5AYmeshuJTEM9ihLeKNMvVLJV4Ic7rBbaI6epiddy9Q4doyBslSRcqbwGR7FIkEJAnDSXxphQ5AJNLvBJb4R6Gu4qV6bUWo0j4PoqbJc9thlj9Pi5pxZQoAcREg0UJ9PfH+QbR1pCKrLmj8eF5Q6t8nNuwgCiYp0p1yY6pFXyvKc4vG+PvZ6SE7aHwNtCG/gsSHlmzuQk2E4/0/20IeJNsgOlGnb4LAZiVuFQkbVwZrrHAGSsZGwPiveYuUgPtxHajtFSKbQhSC+4eT84mOAjf5S5XdRJzMbslIQcV+O+8SU65AVpdqhGQUW0kDT9/XGGPdqSn8acT7e28+n+vpdr7ihdjYT9nY6HjnKcb7OdMhQ9B1s5Ke5L2/H9qP8twMkYbDGlpF1UlAmekQchUyukpxoqBs8HaAwz5PWDAiLYR1nA+UxaQ/EAPxK+KvFU3y2tQn2ml43f5sAoOgeuS3KqSJvruOoJnUGaVMbmM9q6EI4984FGp0hFcAQsodhnPLwa2uWl+H0RLE/zKMgSMoAetLsH+mbiPXeuiThDstwcnCFvjnc5YxirZswuzqptDbvJCUR32ykRgm4IsCplQqCZpAJm3wMcc9S8XMsQgGNRbKWMHMAZjqk3twBiKvivUGStILGcOKm+2T8egs4M2lfMXtzy1/jNJDYnWr2PmFRUqLwKc6ba1cB/UkysLRhCHbF/ptMVQ1G5zgBHmCU/d8tDNazhHQCBVGzS8yGUB2SwJb4JJ2OZ40R6uTg3/lMmqItpfusNTF/Q1S4bygt2xKmKWwt6NrrpDYoUZaIRsC7kEkI0ccNojV9yPFuh/a97adG13SRkw2w0KTgy16FYhM/M9E5gjxXaiLEg+oSrv1Y5ftbh/mGS4hEY5ceyLEUFflp70X3OYBghxwH4+8wFS25z/5rFKTb0s80SjPeENYj8Duy+WNy7OTENssPSwzGLOf5kaR/S25Ri/WYiwO4Zh8RkAv2qFV6/TxXcgKTOKmZX+k7owilFbDNj8o9NJGKgwF3xFIsRGSehzaEiPdPoJ6hK92oBDbFHEBGYjqJxInPJjM4Kb0AKwtMddDPKnJm5CRF4yTXjPTJu28wY3E9wO8FdNpqYV/Yzj4UREr73dnKGA6V0BBymjdLB7kYiwJwPv/Q8Fyff2Arlbnlvp1ohrGwYv/6CPyOqzzakf8UxLFDHgdSOntYBBVEHbkjE+oHomczqug8VrTE7+8nYW0zICAXLpwJQg0pXjZQ2WLSNIzK001am4gjXY7QHERJ1mcaRfR5uEc/L13pa2Z1z3Dq8jL9m7uDlQztyw8wWboLu7qlJQH0hitCqG4S/DcmJxblQPDQHFX7XY4sV1kv7nsuJhkSBeqdFo6XQ2EtjgG0YviW1BCn0TzGBbCpvVz7oAWpWiselTKoK6cbSaJr73n7nP79ehrxSVcEyjnBDThfaSB1W82f/B1EjOR772HX7BabDBVWyOgdjPbSlARPzkPd+uTbdWaG9GmAxZrXivaBpK0kRPfly6CKMt3/fw4ckTCg9WxQaoOABfMVk1TB1V3b7p4FTi8mcGuCKVStBAsERY4mp8PU/EZ04p/jhs4poBFJ4eZXu9919VB4XzH/znNFJwwiQMDD1u+HtnD61aCAoiz0Kaiejo7b/yy/AGdOvf5V333xo4h/Q0nHm4Xyz8w0YlXnImBpHvHwKF6tMNqBEsvpUgjRZYp/5f/+RzDuKgCWYMaKykff1nrWI/UQnq8wzrFVWp5OTxHJcRKJPNIPwa7deeGqtrin+T7BkZGit2bIAT1UqyHDNu5LTJXmL211NQkmVXsLMCdQ3YpeSnmSWmXdvjROBZjITJJyGbwnaln93I6TUMUcMvWeqZ7qtgJ5s+5dyADjMuWXOwOsUJ3vEkC9RZ5mAFL4reumovNsAjBCY4QfJtCMSjw20TWsNeBHKyGSZDMnIjhhNKkS/W4ERA5Lt1iRjNL54YdU0crEgbjChxvvsh0C6DN+bYRaedba0DSA46ZiZD9/DJjsqSoA4tYeWAaVCc9ExQC+2WFUFJBM7AZAUpYqcT5vuUv1oRDaSZ4rR7JXvC3pbyutPrY1uA3QTBMseC8l9UQYfQV/t5whvuS1uXVP2PWR2NJOon+vnL9kUk/Iy9L951W/YhPiORHbODUXVEC3Hf+q3TW7Vly1KyU32tD+eDlvBoMR9R8vwPI/QpVHV9V9bLGbFWPUglnQZMAaL46l0eh9FZCjTlGigReOi9LnnvKw5xPjXSk6z2BC8DW4OugrsvbHEU2eQLW+kDNynKL0GZReX7E0UTpCYWzau+dZgHQZ1QhoOACoqDAv3RoAgqCaOZ8XLvxSkHwdX0OLHdSmGsr0bnUlRPCp9bIqSDERbsJNUJglEoih+sfXcE1431HpkTSYrlggH2MqNdx/d3tF9vUSqxEJ87Hf7w0QCvSzE04PyLIYGQZ925x8ziwgM0BDX4vcn5BYfrG6fADQTAueHRSJqwWMiHzzpjQGByR3dmw6kVYShx82eidsCJ7N5BoQrYSawiNfU7KUINdN+o8L8i0EqSdW9WOi5fexVgqWTBYFwjidJ5HoZEs97G9ua4mVRzFOhTAsEu5o0KqHms0VwUSkyQiuMgmGJkA7qeLUHcj0F2w3TsD9mKTexHGyRsZHdPcZDCJ0q6GXHC09mPS0SvXHa8I1hRYR5JodtDQKJ/ACbg62eP2FKx+4m6U/QZni5UWTqQNV5BzoL+A7SDTnnp8vxO+qZH5sJDl7GX2NP15l9jkX1E8zPdygKlmCPyMCFRJKxC1ubeSU1CuqKqMz8dfh6+lXk3dq2bkj9vn1QxEhDuMkyTsBQcLhOjVre+mTqjjRMSPKhQAg37P03AjpKLnDyd7uHf3WOwGe3hTyD2tPmXOULXyDEIEnaGL26wveOApH0ddwB92cXsUOzGu2Qfjk1jfh6lZ1Iwham+ICjlLvVnrp95nS+nhtHQHIfnxvH1EYd7Ka3IJS226FlDF/Wy2yN7UGKdG+iWmiRO2m1awrIFpPCi/0jZzQUTBvOZcBgXJEKOA57x0awn8CIn6hIGCr5lY0YCaUF4JENw6FONr0Zyzi1gAQSJpooB83xC9f8nXn8S9eOUBihTey3DFWebFxPuG0rWeaMkXk9FZBmZM4lZY1KnV7BSqzaaXEOic9jKw90zqZVPKttrzvMUr8nNUqhDhpUTVM14azFJZJAbhsb3oSEm4t6tS0N6iFKhckUx7n5J9YXEebfCyoDZOcWslXpiwmnPEVRyaiTlcfycZKOIFkakxj+1sUtII8WG2mCINJLaCI3mL7aEW3IEJpSbT4dFRQV/SMOKONPuj5fncSsF+3O2Q2rdAGZIaajHoC6ZzHtho51Yge0wl16HuI7XxwkhEw72KjAaoCrHn4CrLJ0oR2zQYbIuV9UeNQ78bot2ZCmFJKGa5LlIm1woeRzNsqAo9H+H6+4TNHUO037ubYb8srgTY1jyvXwQmv3+S0k5cEef00IMit0IA2GFxkoXm17tAmkulpK464dw/DQZ+Wl58HKCNZedmEg+9oa7C8iLvdltmfPWvyIhq54UuCZ+Q4Lab0kfJWmv9tIgg4HTK0cdqbqqTY/DbExjQ/vclJIqAqP4rDOKg3nwHJpDFPtRJfQJN015ymz8EG6Y/I/pNFJtHjd71A6fBp0UH1VPmtGIsk6jKMe64VjUQcAz6OH8Unv5czhSKRvl/LCAbpdjtQ1n+1B5n6siUTyYV3+oexDyNckI9tP0PZDWlxkQ2COpGaPSTC3UUebMmy89t0rS3pREU9CkiXzdTtW2iqVUdnaVzUbbLw1KsJs8yCrSBmae3JeFa5J4lJBC+XmGDGw5I3UqnrOY87m9ksx5N30W3MICMkkK548SU/niARTTd1tVl4bJbx0us1F4Y9sjiLCLDY6f0siYX8jacok/JWtMw0kdgSGw7tBN3fMpeIpGfAU4VEgYpLnRiM+R6I6I1+8S3ciObmqIXYGEtAvOEXAvGzZ/qknP1W/Xtbzyz7RMR/WlBkQodsqU9/GfHogfHRS1NEYos4nx3vB8vLRxulwmEmLGoDrb0qytBXuvOVT9Gep6ICO28nF+NQ1iR4PI53rG2HRRRUwYmCPymm996iPfR3PR4sfFLO6HETsdC9rFfsChg3+l2vDp9sMU4jsx9tvtpz5t0NsHLV7qXSTyjZ0zl+tE/iHG0JtyXJBGDZMCeYEgVJV/kBPKOR2cjGluizgYJPuPcOgJzzN+FYh2Hcq+lFAJgM4XBclVCbUZEXh+9YGR//MWLSQCdikMzZZchWo/u6xFIeJMMaJLveGgfD1MunjaCfbTty0SmLdcIG3SGAToIGyo5ExiNgQ9W/1JmbfX7z7X5jyPxYaYyYtbbpLA75UZo9iVYRgezeWGnBmhBX5gAzblyIjNUFEB3TVr3gr5CqCIVfWJITQ3Mwt69zwaKOxY3xhaPgnVh7XlZd3mXRcJHNO8YuSWFKM/eYxBtJqwKszq4S1LlYGS8m61rEfcA0YarLWYU7LloDICV7Lv6nGDFxihpxSPQfeDNvUMRbfA9B3tai4fUF1yNeoQ0nY/FCrkM3yBufCAVCxFtH9tU3uxA6XmQUODpjA1HXdQ2CLxxXBIACl332XsVNl9FkRa1HapPngbfXFGqgUWEnnp0PJY1GJKgwqIPLCknZYJQS2kPaW5SSBL/AAUWnPaVQIVDw3XjNhN4wXCNT5YdkrBto4m3GNZkE3ZeENFoOT7v04dHmOj+jP2OClnjZTugMyayVAknTUXjyxle68SwoYqyANyIRrGEF0T0hPI/zTxQLv/OVD3lI7zHyiAJYeDC6dTXeAfmu56Nt03UH8sEBnFmODSZxO2m5bx5Gi4vgRD4cylor00mPydIzNFTadlRiV9AGqMhOihSZJIJoL2rou7etpBz3hse9mbpHTbBLnvNBx6Aa4z1kTmZ1xx4+qY5fUM/GsVfdQjuKvV7yfRaSIdpKPh/XS5nJB0vxjOQAiGHXgrV5fgF/pfcyCRTtqD3nAD6NTgyeMAqEsM53fbvOckLXK7kJc9+MHwHCL9GXJMCmfNRWRvC6kq1EqwoGsT6azxkcowstGeuEM/lK2LmAHvrjO6fBhq2In0KP+Y1UAPdQ8FtZuSAmOoIaz0CYzi26mZQEW7sc5mIbvdeXYUT5jbDmVg/7/0muNqmKg6S6qCePRfh50LbvszAnkqF72kdK7sh/4dpgSt6V4FSXhIjIhGWovSBJfjs1cKbhlgTc0wtv8h2/Y//JZEe7sLrtL4J1TpUhEor5BKfAyEYctV1HdJWrEhoXoGdaklwTtRcyFSewJvZtlXuGDydiyfWWsvhjXPQWy/b0DO4Nm0/LHsukJS+R9c0nhLHwpW4d1BEvQBSoAq6Cr04o1lqaq3vQULkEHaQToRa+eX2QV1fHBFhJhPbSiGJjaLBs7zzC4Dy/0BAnM70sg7msJdnnotE08aCEZgaMvN4Q7NFfD4XEAH1RNscjPV5ijs6W7TxtsvLNaXsI1Bslu+trAV/K0NFl9U5B/uzCqyGoAURBHmGZ4D2NnWDbezcWlpMQHYM2AEG1FxcWtEsNWm+c6j9ljdptXal00Bw2eJfT865B2oSm67RoYHAwy7qAtEhkmCZ/bHZ6yCY7oRpr30emq5zZvd3skiYwjpfcQC3Ut3kidYJl6Eg303kqjrV8LK/fzyoo/EZzpzF+jD3yY1FocTKgoFipb6Gx0pBbmxuwxVFul+VSRuKOt7XRuv4VASFgr+lMUptb0wzdZE+YNZ2T6/nVVe6i8+/EI7OlpwUEmubFvpJMONmzv1LgT5qlwOHKvkiOIgVPnBMf1iKQvXjMIxsbs49tufRmixkzqmPZQd4AFARmtRX2Itc1cd37XG5XtV0UEGHqw1ZTFW/w6YLRL4bholMqbd/pgOqujuJo4tU4V90BeeH5TCxb8YO8uaQRwUReXYtZNiEEXuJJ3pP1gLb7DkeJI28FZVwv3oaT7zNM4uD9kD9gJWlQPHKk3DEywqfAQnjHnbZmQCJRXJJbrCpoDvd5FUdc6n484A1N8+2vUQkFjvRM/+2jsnZ1FtpUOame/Y9oSDYOAjgzrxaoZLNavbRIhPif16dPUpLGzuWbiGFOrYXxE2yWHTGzBEb4PoRnEATKlwFTneLGivwjo+9LAauGHi7G64QkMYyws7YwZS9yW3oB2LgFen4/EvdbXFsDViZziwXD7Yw/UMhyvMIBICIzx1PiVHkvavwOkxo5acbm/z1QrJCd/Kzoq6L4epoFzBfcmgMN16PlxUyZQy1dpEtZcdQd/qAw3GqTuvcmiigqaPSQ5IKenEuhHgcJRi9qavJHMdlM0Zs0fW4zA2+YPei9089g2/Mdf6j1O99dhOCm6i2gIiSjzNpT/HEKWMzjZqFTH6VFyBpQ/mH1EmSI69xBs/drMs1fGtKyO1xI9vTh04sRd5M12xxeU++a8iUQaA8x44oKVrpBvB8DoBr56JZPQBbwhS47aHuWz/388Mp95aAV+5CYzoCHZ7fcf9DMP7IHGdTPrfVYWMkGDyNsmsFr/7vNQQVeNrNEt2JrwUNYCwWo/XfVh3myKfj3G1iTdaw8b/9CQJ/2vvH/EVTG6W6omvMNQzq69PPNfC+Cdf+Wg2FKHWBd8qBI1tEqs9Kp2EnhxfkZK+GgxpMf3Dcwjj3NQ86rXqela7bfwgdrMxW2ncw/oqVx0rzBXyazcyCou3vpnc8oZDi9x1hL9EYeRilnxsNI0I0ikXqv/XU7gQmGc1flwog+aK/oTkb2sosyf6/mPqzQ7zzq/Fhdt0VlVp719BMMV1JdXFE+qz5Rl6br9b5EEU78DyBBSo948HL1QVyvfO5A+Y6KlINSXFNiGcVFj8WaAF6l61gc3CTtsuQULu5KQEcXU4jeR0Gg9Lljz9Wq+PCGvjR9BZ3RTzRKoYjfs+5F1l+Rk7B32X23ZxZJdAFf+wpapAgEM2uUDCNHFmDyN5hFQwjIgThCFhmH8y43lJFuMV+p4hfGTyX4E5O4DS625Rl4SxowgjFvzKCT4dC/6Vp5cIW0dY6OVXZzgETrj1j+TL6IKl/Ctip295oilmeJlNXpZ6js0X2ZnnfTAMM56L0fCLtsPPXgSSAx82YQrdDpShjY7CxdPd3pYYbNIpAgotxEVB2icwDMRUwVsuBYEB9OprDIeFNt6VeZcIGJe9tJkQK+QInBSakYNFyHMKJNWHuBEmeSTo8leV09BuF0AMWNqZbKe+viO4I5a5U4uxPPjklmnIlwoTl2GjVR+QNUxI1ZqPqBs0OWhPCRpvi+VshkKxBpfescjxBimms1fG1rXw8a6sZmw2h/WNVlpdTK2HX1UI8eB3Qq3Bb5xF+2Gt3TjwjmY0akezTqseSYYK6S2ieOq+C9qEn0f+Jj2kM3BwIm0MgQeC+UW3vBJgLo6fqmgSkJiw8voU+zvqT6v/7PfsdqT0Yn5PBWZYo/gMrECK4SKm3FuCu1XminVXFcR+733KLwLmoY06CaaDx53Bnt0ssAp4ERbzUb3cXXm8as3MkLD3kLdoC/NAycc2xzaZuob4lFlvYO2K8YOExVX50ZkzO3kiFjHQstyuFyeD6qU9IWuBI1V7ET9X/mZ6RCEO/Zm/++cH8wXMWBRAp8T8E5p+yi2EipfA7LUfEPMc7eNlqvf/NW5cCOXuvP6NRGmmgtsPpOpULqF+If1we73zsmIAhwbzIrypr/YhwNU1F8EOIA1WBwA3/Fqm6EDeKOlRkt36RnHInZslTB3UvKT9fvmCT+Bfi7p47x3jjapd8EFcKBiUEKqMFFfYtVgpkDjcDBWfwCwB5fVmDb+7HUY1484VdJ0GMPpZTLvqFUdzmLHIcoOukXbW6HhWprP5YeTKALzCphJpQq0YiVpHSfS6Nik7eh+YXh4S0KSDBBXuoNR0zfa1Y/kpp44IiAFzqXmBSxvBik5HcU+X9+ow10lPOet2LknL+woBDoLyk6f/kMHMLTM6ZWxCXiDQGKXgw/cAmR22sAGZ2qXbaphB2Cp9ckCeVeJPxF5dS6OUcs9dz9EU/kkEdpUVYoUIlQuLiRmLb9aLT2RjsSzqYADqqH6Sb0F4kISO9qXfYKaxRBWPPcoMFMuvmNdk6++StNkB5PTWs+ERpHulCRKf+U0=', '__EVENTTARGET': 'MoreInfoList1$Pager', '__EVENTARGUMENT': page } url = 'http://ncztb.nc.gov.cn/nczbw/jyxx/{}/MoreInfo.aspx'.format(categoryId) response = requests.post(url=url, headers=self.headers, params=params, data=data, cookies=self.cookies).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('load_get error:{}'.format(e)) # time.sleep(3) # self.load_get(categoryId, types, page) else: print('第{}页'.format(page)) # div_ele_li = selector.xpath('//ul[@class="ewb-right-item"]/li') url_li = selector.xpath('//table[@id="MoreInfoList1_DataGrid1"]/tr/td[2]/a/@href') # for div_ele in div_ele_li: for url in url_li: # div = etree.tostring(div_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') urls = 'http://ncztb.nc.gov.cn' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 2 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): # print(os.getppid()) threading.Thread(target=self.init).start() flag = 2 task_li = [ {'categoryId':'002001/002001002', 'types':'002001002','all_page': flag}, {'categoryId':'002001/002001004', 'types':'002001004','all_page': flag}, {'categoryId':'002001/002001005', 'types':'002001005','all_page': flag}, {'categoryId':'002002/002002002', 'types':'002002002','all_page': flag}, {'categoryId':'002002/002002005', 'types':'002002005','all_page': flag}, {'categoryId':'002003/002003001', 'types':'002003001','all_page': flag}, {'categoryId':'002003/002003004', 'types':'002003004','all_page': flag}, {'categoryId':'002009/002009001', 'types':'002009001','all_page': flag}, {'categoryId':'002009/002009004', 'types':'002009004','all_page': flag}, {'categoryId':'002004/002004001', 'types':'002004001','all_page': flag}, {'categoryId':'002004/002004002', 'types':'002004002','all_page': flag}, {'categoryId':'002004/002004003', 'types':'002004003','all_page': flag}, {'categoryId':'002004/002004004', 'types':'002004004','all_page': flag}, {'categoryId':'002004/002004005', 'types':'002004005','all_page': flag}, {'categoryId':'002005/002005002', 'types':'002005002','all_page': flag}, {'categoryId':'002010/002010001', 'types':'002010001','all_page': flag}, {'categoryId':'002010/002010002', 'types':'002010002','all_page': flag}, {'categoryId':'002010/002010004', 'types':'002010004','all_page': flag}, ] count = 1 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: categoryId = task['categoryId'] types = task['types'] # self.load_get(categoryId, page) spawns = [gevent.spawn(self.load_get, categoryId, types, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 10: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''杭州政府采购网''' def __init__(self): name = 'hangzhou_cg_hzft_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Cache-Control': 'max-age=0', 'Origin': 'http://cg.hzft.gov.cn', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://cg.hzft.gov.cn/www/noticelist.do', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='hangzhou_list1', dbset='hangzhou_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len(self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self, result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self, pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub( r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >= 2 and area_li[1] != '': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self, url): try: response = requests.get(url=url, headers=self.headers).text selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) else: title = selector.xpath('//div[@class="detail_con"]/h1/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath( '//div[@class="content_about"]//text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d+)', ''.join(publish_date)).group() else: publish_date = None # print(publish_date) area_name = '杭州' source = 'http://cg.hzft.gov.cn/' soup = BeautifulSoup(response) content_html = soup.find(class_='detail_con') # print(content_html) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '杭州市政府采购网' retult_dict['en_name'] = 'Hangzhou Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, data): try: url = 'http://cg.hzft.gov.cn/www/noticelist.do' # proxies = self.proxy_queue.get() response = requests.post(url=url, headers=self.headers, data=data).text selector = etree.HTML(response) except: print('load_post error') self.load_get(data) else: # print('第{}页'.format(page)) url_li = selector.xpath('//ul[@class="c_list_item"]/li/a/@href') print(url_li) for url in url_li: urls = 'http://cg.hzft.gov.cn/' + url # self.load_get_html(urls) if not self.rq.in_rset(urls): self.rq.add_to_rset(urls) self.rq.pull_to_rlist(urls) def init(self): count = 8 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [ gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count) ] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ { 'regionguid': '330100', 'noticetype': '3,3001,3002,3008,3009,3011,3014,4001,4002', 'all_page': 1 }, { 'regionguid': '3', 'noticetype': '5,6', 'all_page': 1 }, { 'regionguid': '', 'noticetype': '1,3012', 'all_page': 1 }, ] count = 1 for task in task_li: for page in range(1, task['all_page'] + 1, count): data = { 'page.pageNum': page, "parameters['regionguid']": task['regionguid'], "parameters['noticetype']": task['noticetype'], "parameters['title']": '' } try: self.load_get(data) # spawns = [gevent.spawn(self.load_get, page + i) for i in range(count)] # gevent.joinall(spawns) print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''苏州政府采购网''' def __init__(self): name = 'suzhou_zfcg_suzhou_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://www.zfcg.suzhou.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'http://www.zfcg.suzhou.gov.cn/html/search.shtml?title=&choose=&projectType=0&zbCode=&appcode=', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', } self.session = requests.session() self.rq = Rdis_Queue(host='localhost', dblist='suzhou_list1', dbset='suzhou_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get_html(self,pid): if pid == None: return try: url = 'http://www.zfcg.suzhou.gov.cn/html/project/'+ pid +'.shtml' response = requests.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:{}'.format(e)) else: title = selector.xpath('//div[@class="M_title"]/text()') if title != []: title = re.sub(r'\r|\n|\s','',title[0]) try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' # print(title) # print(status) _id = self.hash_to_md5(url) publish_date = selector.xpath('//div[@class="date"]/span/text()') if publish_date != []: publish_date = re.search(r'(\d{4}\-\d+\-\d{1,2})',''.join(publish_date)).group() else: publish_date = None # print(publish_date) area_name = '江苏-苏州' # print(area_name) source = 'http://www.zfcg.suzhou.gov.cn/' table_ele = selector.xpath('//div[@id="tab1"]')[0] content_html = etree.tostring(table_ele, encoding="utf-8", pretty_print=True, method="html").decode('utf-8') retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['area_name'] = area_name retult_dict['source'] = source retult_dict['publish_date'] = publish_date retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '苏州市政府采购网' retult_dict['en_name'] = 'Suzhou City Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def load_get(self, types, page): try: data = [ ('title', ''), ('choose', ''), ('type', types), ('zbCode', ''), ('appcode', ''), ('page', page), ('rows', '30'), ] url = 'http://www.zfcg.suzhou.gov.cn/content/searchContents.action' response = requests.post(url=url, headers=self.headers, data=data).json() # selector = etree.HTML(response) except: print('load_post error') self.load_get(types, page) else: print('第{}页'.format(page)) # print(response) response_li = response['rows'] if response_li == []: return for project_id in response_li: pid = project_id['PROJECTID'] # self.load_get_html(pid) if not self.rq.in_rset(pid): self.rq.add_to_rset(pid) self.rq.pull_to_rlist(pid) def init(self): count = 3 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ {'type':'0', 'all_page': 2}, {'type':'1', 'all_page': 2}, {'type':'2', 'all_page': 2}, ] count = 3 for task in task_li: for page in range(1, task['all_page'] + 1, count): try: types = task['type'] # self.load_get(base_url, page) spawns = [gevent.spawn(self.load_get,types, page + i) for i in range(count)] gevent.joinall(spawns) # print('第{}页'.format(page)) except Exception as e: print(e) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()
class GovBuy(object): '''呼和浩特政府采购网''' def __init__(self): name = 'huheaote_hhgp_gov_cn' self.coll = StorageSetting(name) self.collection = self.coll.find_collection self.headers = { 'Origin': 'http://www.hhgp.gov.cn', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh,zh-CN;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': 'application/json, text/javascript, */*', 'Referer': 'http://www.hhgp.gov.cn/huShi_web_login', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'Content-Length': '0', } self.rq = Rdis_Queue(host='localhost', dblist='huhehaote_list1', dbset='huhehaote_set1') def is_running(self): is_runing = True if self.rq.r_len() == 0 and len (self.rq.rset_info()) > 0: return False else: return is_runing def hash_to_md5(self, sign_str): m = hashlib.md5() sign_str = sign_str.encode('utf-8') m.update(sign_str) sign = m.hexdigest() return sign def now_time(self): time_stamp = datetime.datetime.now() return time_stamp.strftime('%Y-%m-%d %H:%M:%S') def save_to_mongo(self,result_dic): self.coll.saves(result_dic) self.is_running() def get_area(self,pro, strs): location_str = [strs] try: df = transform(location_str, umap={}) area_str = re.sub(r'省|市', '-', re.sub(r'省市区0', '', re.sub(r'/r|/n|\s', '', str(df)))) except: pass else: if area_str == '': area_li = [pro] else: area_li = (area_str.split('-')) if len(area_li) >=2 and area_li[1] !='': return '-'.join(area_li[:2]) else: return area_li[0] def load_get(self,params): try: url = 'http://www.hhgp.gov.cn/huShi_web_login/showAllMessage' response = requests.post(url=url, headers=self.headers,params=params).json() response_str = response['0'] selector = etree.HTML(response_str) except: print('load_post error') else: url_li = selector.xpath('//li/span[1]/a/@href') for url in url_li: url = 'http://www.hhgp.gov.cn'+ url if not self.rq.in_rset(url): self.rq.add_to_rset(url) self.rq.pull_to_rlist(url) def load_get_html(self,url): try: response = requests.get(url=url, headers=self.headers).content.decode('utf-8') selector = etree.HTML(response) except Exception as e: print('laod_get_html error:'.format(e)) else: # print(response) _id = self.hash_to_md5(url) title = selector.xpath('//*[@id="content"]/div/div[2]/div/div/h1/text()') if title != []: title = title[0] try: status = re.search(r'[\u4e00-\u9fa5]{2}公告$', title).group() except: status = '公告' else: title = None status = '公告' publish_date = selector.xpath('//*[@id="content"]/div/div[2]/div/div/i/text()') if publish_date != []: publish_date = re.search(r'(\d+\-\d+\-\d+)',publish_date[0]).group() else: publish_date = None # print(publish_date) soup = BeautifulSoup(response) content_html = soup.find(class_='content') # print(content_html) source = 'http://www.hhgp.gov.cn/' area_name = self.get_area('呼和浩特',title) # print(content) retult_dict = dict() retult_dict['_id'] = _id retult_dict['title'] = title retult_dict['status'] = status retult_dict['publish_date'] = publish_date retult_dict['source'] = source retult_dict['area_name'] = area_name retult_dict['detail_url'] = url retult_dict['content_html'] = str(content_html) retult_dict['create_time'] = self.now_time() retult_dict['zh_name'] = '呼和浩特市政府采购网 ' retult_dict['en_name'] = 'Huhhot City Government Procurement' # print(retult_dict) print('列表长度为={}'.format(self.rq.r_len())) self.save_to_mongo(retult_dict) def init(self): count = 6 while self.is_running(): if self.rq.r_len() <= count: count = 1 try: # self.load_get_html(self.rq.get_to_rlist()) spawns = [gevent.spawn(self.load_get_html, self.rq.get_to_rlist()) for i in range(count)] gevent.joinall(spawns) except Exception as e: print(e) def run(self): threading.Thread(target=self.init).start() task_li = [ # {'code':'265.266.304', 'all_page': 29}, # {'code':'265.266.269', 'all_page': 70}, # {'code':'265.266.270', 'all_page': 67}, # {'code':'265.266.271', 'all_page': 217}, # {'code':'265.266.303', 'all_page': 58}, # {'code':'265.266.404', 'all_page': 1}, # {'code':'265.266.403', 'all_page': 14}, # {'code':'265.266.343', 'all_page': 21}, {'code':'265.266.304', 'all_page': 1}, {'code':'265.266.269', 'all_page': 1}, {'code':'265.266.270', 'all_page': 1}, {'code':'265.266.271', 'all_page': 1}, {'code':'265.266.303', 'all_page': 1}, {'code':'265.266.404', 'all_page': 1}, {'code':'265.266.403', 'all_page': 1}, {'code':'265.266.343', 'all_page': 1}, ] for task in task_li: for page in range(1,task['all_page'] + 1): params = ( ('code', task['code']), ('pageNo', str(page)), ('check', '1'), ) self.load_get(params) print('第{}页'.format(page)) if self.rq.r_len() > 0: threading.Thread(target=self.init).start() def main(self): self.run()