def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://www.nanhai.gov.cn' self.xpath_rule = { 'title_rule': './td[2]/a/text()', 'url_rule': './td[2]/a/@href', 'web_time_rule': './td[3]/text()', 'content_rule': r'id="ArticleBody">(.*?)<!DOCTYPE html' } self.error_count = 0 self.source_name = '佛山市南海区人民政府网' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '414' self.headers = { 'Host': 'www.nanhai.gov.cn', 'Referer': 'http://www.nanhai.gov.cn/cms/html/nanhai/index.html' } self.start_urls = { # 所有种类都包含在这个url 共1251页 每天更新跨度1页左右 ('招标公告', 'http://www.nanhai.gov.cn/cms/sites/nanhai/zwgk_zdly_page_right.jsp?ColumnID=11960&page={}', 3) }
def __init__(self): self.headers = { 'Host': 'www.crecgec.com', 'Connection': 'keep-alive', } # 获取城市字典 self.city_dict = get_city_dict() # 获取招标信息种类 self.category = category # 获取正则规则 self.regularExpression = regularExpression self.error_count = 0 # 文章拼接的url self.article_url = 'http://www.crecgec.com/' self.start_urls = [ # 采购公告 共995页 每天更新跨度4页 ('招标公告', "http://www.crecgec.com/forum.php?mod=forumdisplay&fid=2&sortid=12&filter=sortid&sortid=12&mcode=0001&page={}", 3), # 竞争性谈判 共504页 每天更新跨度3页 ('招标公告', "http://www.crecgec.com/forum.php?mod=forumdisplay&fid=2&sortid=14&filter=sortid&sortid=14&page={}", 3), # 结果公示 共1000页 每天更新跨度3页 ('变更公告', "http://www.crecgec.com/forum.php?mod=forumdisplay&fid=2&sortid=13&sortid=13&filter=sortid&page={}", 3), ]
def __init__(self): self.city_dict = get_city_dict() self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.category = category self.govPurchase_baseUrl = 'http://www.msggzy.org.cn' self.error_count = 0 self.start_urls = [ # 采购公告 # 政府采购公告 共5页 每天更新1页 ('招标公告', 'http://www.msggzy.org.cn/front/zfcg/002001/?Paging={}', 3), # 政府采购变更公告 共3页 同上一页 ('变更公告', 'http://www.msggzy.org.cn/front/zfcg/002002/?Paging={}', 3), # 政府采购结果 共6页 同上一页 ('招标结果', 'http://www.msggzy.org.cn/front/zfcg/002003/?Paging={}', 3), # 工程建设 # 工程建设招标公告 共17页 每天更新1页 ('招标公告', 'http://www.msggzy.org.cn/front/jsgc/001002/?Paging={}', 3), # 中标候选人公示 共17页 每天更新跨度1页 ('招标结果', 'http://www.msggzy.org.cn/front/jsgc/001013/?Paging={}', 3), # 工程建设招标结果 共16页 每天更新跨度1页 ('招标结果', 'http://www.msggzy.org.cn/front/jsgc/001015/?Paging={}', 3), ] self.headers = { 'Host': 'www.msggzy.org.cn', }
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = '' self.xpath_rule = { 'title_rule': './p[1]/a/text()', 'url_rule': './p[1]/a/@href', 'web_time_rule': './p[2]/text()', 'content_rule' : r'<div class="article">(.*?)<div class="footer">' } self.error_count = 0 self.source_name = '六盘水市公共资源交易网' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '425' self.headers = { 'Host': 'ggzy.gzlps.gov.cn', 'Referer': 'http://ggzy.gzlps.gov.cn/jyxxzc/index.jhtml' } self.pc = pc self.start_urls = [ # 政府采购 共252页 每天更新跨度 1页 ('招标公告', 'http://ggzy.gzlps.gov.cn/jyxxzc/index_{}.jhtml', 3), # 建设工程 共625页 每天更新跨度1页 ('招标公告', 'http://ggzy.gzlps.gov.cn/jyxxgc/index_{}.jhtml', 3), ]
def __init__(self): self.city_dict = get_city_dict() self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.category = category self.govPurchase_baseUrl = 'http://www.dqgpc.gov.cn' self.error_count = 0 self.source_name = '株洲市公共资源交易网' self.addr_id = '431' self.baseUrl = '' self.xpath_rule = { 'list_page': '//div[@class="article-content"]/ul/li', 'title_rule': './div[1]/a//text()', 'url_rule': './div[1]/a/@href', 'web_time_rule': './div[1]//div/text()', 'content_rule': r'<div class="content">(.*?)<span><a href="' } self.start_urls = [ # 政府采购类 包含采购公告、采购结果 共273页 每天更新1页 ('招标公告', 'http://zhuzhou.hnsggzy.com/jygkzfcg/index_{}.jhtml', 3), # 工程建设类 包含公告、结果、 共200页 每天更新1页 ('招标公告', 'http://zhuzhou.hnsggzy.com/gczb/index_{}.jhtml', 3), ] self.headers = { 'Host': 'zhuzhou.hnsggzy.com', 'Referer': 'http://zhuzhou.hnsggzy.com/jygkzfcg/index_2.jhtml' }
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'https://www.dlggzy.cn' self.xpath_rule = { 'title_rule': './td[3]/a/@title', 'url_rule': './td[3]/a/@href', 'web_time_rule': './td[4]/text()', 'modify_url_rule': './td[4]/a/@href', 'modify_title_rule': './td[4]/a/@title', 'modify_web_time_rule': './td[5]//text()', 'result_title_rule': './td[3]/@title', 'content_rule': r'<div class="news-title">(.*?)<div class="foot row">' } self.error_count = 0 self.source_name = '大理州公共资源交易电子服务系统' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '427' self.headers = { 'Host': 'www.dlggzy.cn', 'Referer': 'https://www.dlggzy.cn/jyxx/zfcg/cggg' } self.pc = pc self.start_urls = [ # 政府采购 采购公告 共146页 变更通知共46页 结果公告121页 每天更新跨度均1页 ('招标公告', 'https://www.dlggzy.cn/jyxx/zfcg/cggg?currentPage={}&area=013&scrollValue=0', 146), ('变更公告', 'https://www.dlggzy.cn/jyxx/zfcg/gzsx?currentPage={}&area=013&scrollValue=0', 46), ('招标结果', 'https://www.dlggzy.cn/jyxx/zfcg/zbjggs?currentPage={}&area=013&scrollValue=0', 121), # 工程建设 招标公告 共132页 变更通知 共90页 评标结果公示 共102页 中标结果公示 共146页 每天更新跨度均1页 ('招标公告', 'https://www.dlggzy.cn/jyxx/jsgcZbgg?currentPage={}&area=013&scrollValue=0', 132), ('变更公告', 'https://www.dlggzy.cn/jyxx/jsgcBgtz?currentPage={}&area=013&scrollValue=0', 90), ('招标结果', 'https://www.dlggzy.cn/jyxx/jsgcpbjggs?currentPage={}&area=013&scrollValue=0', 102), ('招标结果', 'https://www.dlggzy.cn/jyxx/jsgcZbjggs?currentPage={}&area=013&scrollValue=0', 146), ]
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://ggb.sx.gov.cn' self.xpath_rule = { 'content_rule': r'<meta name="ContentStart">(.*?)<meta name="ContentEnd">' } self.error_count = 0 self.source_name = '绍兴公共资源交易网' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '401' self.headers = { 'Host': 'ggb.sx.gov.cn', 'Referer': 'http://ggb.sx.gov.cn/' } self.pc = pc self.start_urls = [ # 市级政府采购 招标公告 共174页 中标公告 共146页 废标公告 共35页 每天更新跨度各一页 ('招标公告', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518860&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), ('招标结果', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518861&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), ('招标结果', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518862&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), # 县级政府采购 采购公告 共151页 中标公告 共100页 每天更新跨度各一页 ('招标公告', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518895&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), ('招标结果', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518896&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord=90&perpage={}', 3), # 县级建设工程 招标公告 共75页 中标公示 共70页 成交结果 共43页 每天更新数据均一页 ('招标公告', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518891&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), ('招标结果', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518892&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), ('招标结果', 'http://ggb.sx.gov.cn/module/jpage/dataproxy.jsp?col=1&appid=1&webid=3003&path=%2F&columnid=1518893&sourceContentType=1&unitid=4685909&webname=%E7%BB%8D%E5%85%B4%E5%85%AC%E5%85%B1%E8%B5%84%E6%BA%90%E4%BA%A4%E6%98%93%E7%BD%91&permissiontype=0&startrecord={}&endrecord={}&perpage=15', 3), ]
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://zwgk.hefei.gov.cn' self.xpath_rule = { 'list_page': '//form[@name="form1"]/table/tr[3]//table', 'title_rule': './/tr/td[2]/a/text()', 'url_rule': './/tr/td[2]/a/@href', 'web_time_rule': './/tr/td[3]//text()', 'content_rule': r'style="font-weight:bold;">(.*?)<!-- GWD SHARE BEGIN 文章底部-->' } self.error_count = 0 self.source_name = '合肥市政府信息公开网' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '402' self.headers = { 'Host': 'zwgk.hefei.gov.cn', 'Referer': 'http://zwgk.hefei.gov.cn/zwgk/public/index.xp?doAction=zdlylist2&type=5&id=001000180002' } self.pc = pc self.start_urls = [ # 政府采购 招标公告 共1471页 中标公示 共1816页 每天更新跨度分别是(2,3)页 可以写6页 ('招标公告', 'http://zwgk.hefei.gov.cn/zwgk/public/index.xp?doAction=zdlylist2&type=5&id=001000180002&curPage={}', 3), ('招标结果', 'http://zwgk.hefei.gov.cn/zwgk/public/index.xp?doAction=zdlylist2&type=5&id=001000180003&curPage={}', 3), # 工程招标 招标公告 共1342页 中标公示 共900页 每天更新跨度均3页 可以写5页 ('招标公告', 'http://zwgk.hefei.gov.cn/zwgk/public/index.xp?doAction=zdlylist2&type=5&id=001000190002&curPage={}', 3), ('招标结果', 'http://zwgk.hefei.gov.cn/zwgk/public/index.xp?doAction=zdlylist2&type=5&id=001000190003&curPage={}', 3), ]
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://www.ggzy.anshun.gov.cn' self.xpath_rule = { 'list_page': '//div[@class="ewb-right-bd"]/ul/li', 'title_rule': './div/a/text()', 'url_rule': './div/a/@href', 'web_time_rule': './span/text()', 'content_rule': r'<div class="ewb-list-bd">(.*?)<!-- 分享 BEGIN -->' } self.error_count = 0 self.source_name = '安顺市全国公共资源交易平台' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '425' self.headers = { 'Host': 'www.ggzy.anshun.gov.cn', 'Referer': 'www.ggzy.anshun.gov.cn' } self.pc = pc self.start_urls = [ # 政府采购 招标公告110页 交易结果公示87页 废标公告30页 资审结果公示2页 答疑澄更16页 ('招标公告', 'http://www.ggzy.anshun.gov.cn/jyxx/003002/003002001/{}.html', 3), ('招标结果', 'http://www.ggzy.anshun.gov.cn/jyxx/003002/003002002/{}.html', 3), ('招标结果', 'http://www.ggzy.anshun.gov.cn/jyxx/003002/003002003/{}.html', 3), ('招标结果', 'http://www.ggzy.anshun.gov.cn/jyxx/003002/003002004/{}.html', 3), ('招标公告', 'http://www.ggzy.anshun.gov.cn/jyxx/003002/003002005/{}.html', 3), # 建设工程 招标公告396页 交易结果公示313 废标公告42页 ('招标公告', 'http://www.ggzy.anshun.gov.cn/jyxx/003001/003001001/{}.html', 3), ('招标结果', 'http://www.ggzy.anshun.gov.cn/jyxx/003001/003001002/{}.html', 3), ('招标结果', 'http://www.ggzy.anshun.gov.cn/jyxx/003001/003001003/{}.html', 3), ]
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://www.jztb.gov.cn' self.xpath_rule = { 'title_rule': './/a/text()', 'url_rule': './/a/@href', 'web_time_rule': './span/text()', 'content_rule': r'<div class="news-article">(.*?)<!-- footer -->' } self.error_count = 0 self.source_name = '锦州市公共资源交易管理办公室' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '411' self.headers = { 'Host': 'www.jztb.gov.cn', 'Referer': 'www.jztb.gov.cn' } self.pc = pc self.start_urls = [ # 政府采购 公告共397页 结果共258 变更共14页(更新频率均1页) ('招标公告', 'http://www.jztb.gov.cn/jyxx/077001/077001001/{}.html', 3 ), ('招标结果', 'http://www.jztb.gov.cn/jyxx/077001/077001002/{}.html', 3), ('变更公告', 'http://www.jztb.gov.cn/jyxx/077001/077001003/{}.html', 3), # 工程建设 招标公告112 中标候选人72 中标公示61(更新频率均1页) ('招标公告', 'http://www.jztb.gov.cn/jyxx/077002/077002001/{}.html', 3 ), ('招标结果', 'http://www.jztb.gov.cn/jyxx/077002/077002002/{}.html', 3), ('招标结果', 'http://www.jztb.gov.cn/jyxx/077002/077002003/{}.html', 3), # 药品器械采购公告 采购结果 均13页 ('招标公告', 'http://www.jztb.gov.cn/jyxx/077005/077005001/{}.html', 3 ), ('招标结果', 'http://www.jztb.gov.cn/jyxx/077005/077005002/{}.html', 3), ]
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://ggzyjy.dl.gov.cn' self.xpath_rule = { 'title_rule': './td[2]/a/text()', 'url_rule': './td[2]/a/@href', 'web_time_rule': './td[4]/text()', 'content_rule' : r'id="mainContent">(.*?)<!-- footer -->' } self.error_count = 0 self.source_name = '大连市公共资源' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '411' self.headers = { 'Host': 'ggzyjy.dl.gov.cn', 'Referer': 'https://www.ggzyjy.dl.gov.cn' } self.start_urls = [ # 政府采购 ('招标公告', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071002/071002001/?pageing={}', 3), ('招标结果', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071002/071002003/?pageing={}', 3), ('招标公告', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071002/071002005/?pageing={}', 3), # 建设工程 ('招标公告', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071001/071001001/?pageing={}', 3), ('招标结果', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071001/071001002/?pageing={}', 3), ('招标结果', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071001/071001003/?pageing={}', 3), # 卫计采购 ('招标公告', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071008/071008001/?pageing={}', 3), ('招标结果', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071008/071008003/?pageing={}', 3), ('招标公告', 'http://ggzyjy.dl.gov.cn/TPFront/jyxx/071008/071008005/?pageing={}', 3), ] self.health_xpath = { 'title_rule' : './div/a/text()', 'url_rule' : './div/a/@href', 'web_time_rule' : './span/text()', 'content_rule': r'id="mainContent">(.*?)<!-- footer -->' }
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://xinxiang.hngp.gov.cn' self.xpath_rule = { 'title_rule': './a/text()', 'url_rule': './a/@href', 'web_time_rule': './span/text()', 'content_rule': r'</P>(.*?)<!--EndFragment-->' } self.error_count = 0 self.source_name = '新乡市政府采购网' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '429' self.headers = { 'Host': 'xinxiang.hngp.gov.cn', 'Referer': 'http://xinxiang.hngp.gov.cn/xinxiang/content?infoId=1548325966768822&channelCode=H680201&bz=0' } self.pc = pc self.bidNotice_url = 'http://xinxiang.hngp.gov.cn/webfile/xinxiang/cgxx/cggg/webinfo/{}/{}/{}.htm' self.bidResult_url = 'http://xinxiang.hngp.gov.cn/webfile/xinxiang/cgxx/jggg/webinfo/{}/{}/{}.htm' self.modifyResult_url = 'http://xinxiang.hngp.gov.cn/webfile/xinxiang/cgxx/bggg/webinfo/{}/{}/{}.htm' self.start_urls = { # 采购公告 共1305页 每天更新跨度1页 但只能爬前200页 ('招标公告', 'http://xinxiang.hngp.gov.cn/xinxiang/ggcx?appCode=H68&channelCode=0101&bz=0&pageSize=20&pageNo={}', 3), ('招标结果', 'http://xinxiang.hngp.gov.cn/xinxiang/ggcx?appCode=H68&channelCode=0102&bz=0&pageSize=20&pageNo={}', 3), ('变更公告', 'http://xinxiang.hngp.gov.cn/xinxiang/ggcx?appCode=H68&channelCode=0103&bz=0&pageSize=20&pageNo={}', 3) }
def __init__(self): self.city_dict = get_city_dict() self.regularExpression = regularExpression self.category = category # 文章拼接链接 self.article_url = 'http://61.235.77.80/' self.pattern01 = r'成交人:(.*?)<' self.pattern_list = [self.pattern01] self.headers = { 'Host': '61.235.77.80', 'Referer': 'http://61.235.77.80/mainPageNoticeList.do?method=list&cur=1', } self.start_urls = [ # 采购公告 共1601页 每天更新跨度10页 ('招标公告', "http://wz.guangzh.95306.cn/mainPageNoticeList.do?method=init&id=1000001&cur={}&keyword=&inforCode=&time0=&time1=", 16), # 中标结果 共598页 每天更新跨度6页 ('招标结果', "http://wz.guangzh.95306.cn/mainPageNoticeList.do?method=init&id=1200001&cur={}&keyword=&inforCode=&time0=&time1=", 11), # 变更公告 共85页 每天更新跨度1页 ('变更公告', "http://wz.guangzh.95306.cn/mainPageNoticeList.do?method=init&id=1300001&cur={}&keyword=&inforCode=&time0=&time1=", 3), # 采购公示 共487页 每天更新跨度3页 ('招标结果', "http://wz.guangzh.95306.cn/mainPageNoticeList.do?method=init&id=1600001&cur={}&keyword=&inforCode=&time0=&time1=", 5), # 采购公示 共484页 每天更新跨度3页 ('招标公告', "http://wz.guangzh.95306.cn/mainPageNoticeList.do?method=init&id=7000001&cur={}&keyword=&inforCode=&time0=&time1=", 5), # 结果公示 共39页 每天更新跨度3页 ('招标结果', "http://wz.guangzh.95306.cn/mainPageNoticeList.do?method=init&id=7200001&cur={}&keyword=&inforCode=&time0=&time1=", 5) ]
def __init__(self): self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.category = category self.xpath_rule = { 'list_page': '', 'title_rule': '', 'url_rule': '', 'web_time_rule': '', 'content_rule': r'' } self.city_dict = get_city_dict() self.pc = pc self.source_name = '' self.addr_id = '' self.error_count = '' self.baseUrl = '' self.headers = {}
def __init__(self): self.city_dict = get_city_dict() self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.category = category self.govPurchase_baseUrl = 'http://www.dqgpc.gov.cn' self.source_name = '大庆市公共资源交易网' self.addr_id = '413' self.baseUrl = '' self.error_count = 0 self.xpath_rule = { 'list_page': '//div[@class="infor-con2 on"]//li', 'title_rule': './a/@title', 'url_rule': './a/@href', 'web_time_rule': './span/text()', 'content_rule': r'<body.*?>(.*?)</body>' } self.start_urls = [ # 政府采购公告 共843页 每天更新1页 ('招标公告', 'http://www.dqgpc.gov.cn/jyxxZfcgCggg/index_{}.htm', 3), # 政府采购结果 共364页 每天更新1页 ('招标结果', 'http://www.dqgpc.gov.cn/jyxxZfcgZbgg/index_{}.htm', 3), # 预中标公告 共402页 每天更新1页 ('招标结果', 'http://www.dqgpc.gov.cn/jyxxZfcgYzbgg/index_{}.htm', 3), # 建设水利交通工程招标工程 共110页 每天更新1页 ('招标公告', 'http://www.dqgpc.gov.cn/jyxxJsgcZbgg/index_{}.htm', 3), # 建设水利交通工程招标工程变更公告 共31页 每天更新1页 ('变更公告', 'http://www.dqgpc.gov.cn/jyxxJsgcBgcggg/index_{}.htm', 3), # 建设水利交通工程招标工程结果公告 共133页 每天更新1页 ('招标结果', 'http://www.dqgpc.gov.cn/jyxxJsgcZbgs/index_{}.htm', 3), ] self.headers = { 'Host': 'www.dqgpc.gov.cn', 'Referer': 'http://www.dqgpc.gov.cn/jyxxZfcg/index_1.htm' }
def __init__(self): self.city_dict = get_city_dict() self.regularExpression = regularExpression self.category = category self.bidNotice_baseUrl = 'http://www.cjhdj.com.cn/xxgk/wsgs/zbgg' self.resultNotice_baseUrl = 'http://www.cjhdj.com.cn/xxgk/wsgs/zbjggs' self.xpath_rule = { 'list_page': '//div[@class="gl_list1"]//ul/li', 'title_rule': './h3//a/text()', 'url_rule': './h3//a/@href', 'web_time_rule': './h3/span/text()', 'content_rule': r'<div class="bor1 pad_t20 mar_t15">(.*?)<div class="xl_icon"' } self.error_count = 0 self.source_name = '长江航道局' self.addr_id = '' self.start_urls = [ # 共75页 每天更新跨度1页 ('招标公告', 'http://www.cjhdj.com.cn/xxgk/wsgs/zbgg/index_{}.shtml', 3 ), # 共52页 每天更新跨度1页 ('招标结果', 'http://www.cjhdj.com.cn/xxgk/wsgs/zbjggs/index_{}.shtml', 3), ] self.headers = { 'Host': 'www.cjhdj.com.cn', 'Referer': 'http://www.cjhdj.com.cn/xxgk/wsgs/zbgg/index_1.shtml', 'Connection': 'keep-alive' }
def __init__(self): self.city_dict = get_city_dict() self.regularExpression = regularExpression self.category = category self.base_url = 'http://www.airchina.com.cn' self.source_name = '中国国际航空公司' self.error_count = 0 self.start_urls = [( '招标公告', 'http://www.airchina.com.cn/cn/contact_us/cgpt/cgxmgg/index.shtml', 3)] self.headers = { 'Connection': 'keep-alive', 'Host': 'www.airchina.com.cn', } self.xpath_rule = { 'content_rule': r'<!--AutonomyContentBegin-->(.*?)<!--AutonomyContentEnd-->' }
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://www.yulin.gov.cn' self.xpath_rule = { 'list_page' : '//div[@class="zfdtxx_lb bszn"]/ul/li', 'title_rule': './/a/@title', 'url_rule': './/a/@href', 'web_time_rule': './span/text()', 'content_rule' : r'</h3>(.*?)<a class="button"' } self.error_count = 0 self.source_name = '玉林市人民政府门户网站' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '416' self.headers = { 'Host': 'www.yulin.gov.cn', 'Referer': 'http://www.yulin.gov.cn/menhuwangzhan/zwgk/ggzypzlygk/zfcg/zbgg' } self.pc = pc self.start_urls = [ # 招标公告2480 中标公告2247 变更公告185 单一来源11 废标公告111 每天更新量均1页 ('招标公告', 'http://www.yulin.gov.cn/web/ylmh/channel/channel.ptl?pageNo={}&channelCode=001083032002002&$$time$$=20190213104940', 3), ('招标结果', 'http://www.yulin.gov.cn/web/ylmh/channel/channel.ptl?pageNo={}&channelCode=001083032002004&$$time$$=20190213134259', 3), ('变更公告', 'http://www.yulin.gov.cn/web/ylmh/channel/channel.ptl?pageNo={}&channelCode=001083032002003&$$time$$=20190213134346', 3), ('招标公告', 'http://www.yulin.gov.cn/web/ylmh/channel/channel.ptl?pageNo={}&channelCode=001083032002005&$$time$$=20190213134541', 3), ('招标结果', 'http://www.yulin.gov.cn/web/ylmh/channel/channel.ptl?pageNo={}&channelCode=001083032002006&$$time$$=20190213134841', 3), ]
def __init__(self): self.city_dict = get_city_dict() self.category = category self.baseUrl = 'http://www.gxlzzb.com' self.xpath_rule = { 'title_rule': './td/div/a/@title', 'url_rule': './td/div/a/@href', 'web_time_rule': './/span[@class="wb-data-date"]/text()', 'content_rule' : r'<td id="TDContent".*?>(.*?)<td id="authorTd' } self.error_count = 0 self.source_name = '柳州市公共资源交易服务中心网站' self.regularExpression = regularExpression self.regularExpression02 = regularExpression02 self.addr_id = '416' self.headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 'Host': 'www.gxlzzb.com', 'Origin': 'http://www.gxlzzb.com' } self.pc = pc self.gov_bidNotice_url = 'http://www.gxlzzb.com/gxlzzbw//showinfo/jyxxmore.aspx?catgorynum1=&catgorynum2=&xiaqu=&type=1' self.VIEWSTATE = '' self.VIEWSTATEGENERATOR = 'D38D4441' self.EVENTTARGET = 'JyxxSearch1%24Pager' self.count = 1
def __init__(self, items, response, content_rule, source_name): self.items = items self.response = response self.content_url = content_rule self.source_name = source_name self.city_data = get_city_dict()