def get_list(string): info = {} flag = 0 try: headers = config.headers_index content, status_code = Send_Request().send_request( config.index_url, headers) if status_code == 200: result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) id = result.xpath('//span[@class = "shouButton"]/@onclick')[0] pattern = re.compile(".*QueryIndex\('','(.*?)'\).*") match_id = re.findall(pattern, id)[0] url = config.list_url.format(match_id) #随机生成UA a = random.randrange(1, 1001) # 1-1000中生成随机数 headers = config.headers params = config.list_parmas.format(string) theline = linecache.getline(r'user-agent.txt', a) theline = theline.replace("\n", '') headers["User-Agent"] = theline result = requests.post(url, params, headers=headers) status_code = result.status_code s = chardet.detect(result.content)["encoding"] if status_code == 200 and s == 'utf-8': pattern = re.compile(u".*无查询结果.*|.*访问频繁.*|.*访问异常.*") match = re.findall(pattern, result.content) if len(match) == 0: content = etree.HTML( result.content, parser=etree.HTMLParser(encoding='utf-8')) list = content.xpath("//li") for i, single in enumerate(list): item = single.xpath(".//a/@href")[0] url = config.host + item info[i] = url flag = 1 else: flag = 100000003 else: flag = 100000004 else: flag = 10000004 except Exception, e: logging.error("search error:%s" % e) flag = 100000004
def get_deatail_info(self, detail_url, info): dict = { u"执行事项": "items", u"裁定书文号": "rule_no", u"证照种类": "cert_cate", u"证照号码": "cert_code", u"冻结期限自": "start_date", u"冻结期限至": "end_date", u"冻结期限": "period", u"公示日期": "pub_date" } headers = config.headers result, status_code = Send_Request().send_requests(detail_url, headers) if status_code == 200: data = result.xpath(result, parser=etree.HTMLParser(encoding='utf-8')) for key, value in dict: content = deal_html_code.get_match_info(key, data) info[value] = content else: logging.info("获取司法协助详情信息失败!")