def get_detail_html(url): for i in range(5): try: response = requests.get(url, headers=headers, proxies=get_proxy()) res = response.text print("当前运行url为{}".format(url)) if res: return res except Exception as e: print("当前运行url{}出错{}".format(url, repr(e)))
def get_all_brand(): # 获取所有的品牌,然后请求品牌url,获取所有地区的url,请求地区的url,获取列表页,获取条数,判断有几页,进行翻页 url="https://www.chinatruck.org/service/b3382_area_0" for i in range(3): try: res=requests.get(url,headers=headers,proxies=get_proxy()).text except Exception as e: print("当前运行url{}出错{}=1==".format(url, repr(e))) html=etree.HTML(res) brand_url_list=list(set(html.xpath("//li/div[@class='brand-jxs-tab']/span/a/@href"))) return brand_url_list
def get_all_brand(): # 获取所有的品牌,然后请求品牌url,获取所有地区的url,请求地区的url,获取列表页,获取条数,判断有几页,进行翻页 url = "https://www.chinatruck.org/dealer/b490_area_0" for i in range(3): try: res = requests.get(url, headers=headers, proxies=get_proxy()).text except: pass html = etree.HTML(res) brand_url_list = list( set(html.xpath("//li/div[@class='brand-jxs-tab']/span/a/@href"))) return brand_url_list
def get_add_url_by_brand(brand_url): add_url_dict={} for i in range(5): try: res=requests.get(brand_url,headers=headers,proxies=get_proxy()).text break except Exception as e: print("当前运行url{}出错{}===".format(brand_url, repr(e))) html = etree.HTML(res) addr_url_list =html.xpath("//li/dl[@class='tab-fw']/dd") for addr_url in addr_url_list: province=addr_url.xpath("./a/text()")[0] url=addr_url.xpath("./a/@href")[0] add_url_dict[province]=url return add_url_dict
def get_add_url_by_brand(brand_url): add_url_dict = {} for i in range(5): try: res = requests.get(brand_url, headers=headers, proxies=get_proxy()).text break except: pass try: html = etree.HTML(res) addr_url_list = html.xpath("//li/dl[@class='tab-jxs']/dd") for addr_url in addr_url_list: province = addr_url.xpath("./a/text()")[0] url = addr_url.xpath("./a/@href")[0] add_url_dict[province] = url # except: # pass finally: return add_url_dict