示例#1
0
def getHTMLText(url):
    try:
        re = requests.get(url, timeout=30)
        re.raise_for_status()
        re.encoding = re.apparent_encoding
        return re.text
    except:
        return ""
def getHTMLText(url):
    try:
        # print("获取url当中")
        re = requests.get(url, timeout=5000)
        re.raise_for_status()
        re.encoding = re.apparent_encoding
        print("获取url完成")
        return re.text
    except:
        print("获取Url失败")
示例#3
0
def getHtmlByRequets(url):  #url+?直接访问网页
    try:
        kv = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
        }
        re = requests.request('get', url, timeout=30, headers=kv)
        re.raise_for_status()
        re.encoding = re.apparent_encoding
        return re.text
    except:
        print('访问页面有误')
        return
示例#4
0
def get_html(url):
    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Encoding':
        'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.9',
        'Cache-Control':
        'max-age=0',
        'Connection':
        'keep-alive',
        'Cookie':
        'TYCID=b0d81ab0628011ea9068576c9a359aa1; undefined=b0d81ab0628011ea9068576c9a359aa1; ssuid=7039631112; _ga=GA1.2.267575536.1583811546; tyc-user-phone=%255B%252215108389554%2522%255D; CLOUDID=83c9c02b-efd6-4491-ab64-d2d65537b3f3; parent_qimo_sid_f0615f20-d9d7-11e9-96c6-833900356dc6=a24fad60-6347-11ea-a49b-11a674d348b6; jsid=SEM-BAIDU-PZ2003-VI-000001; bad_idf0615f20-d9d7-11e9-96c6-833900356dc6=b473dcd1-6817-11ea-86ec-5d3b2603a5c7; aliyungf_tc=AQAAAJVvqAzbQwsAqOpZ2rMqOX14e7uj; csrfToken=Y8y9xvyOXOF9lDgUIiR_O6O-; Hm_lvt_dfd2445765658d46619739a80fb5f6b2=1585536438; X-TOKEN=eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ7XCJjcmVhdGVUaW1lXCI6MTU4NTUzNjk2MzQ5MCxcImV4cGlyZXNcIjo2MDQ4MDAsXCJpZFwiOjEwNDQ4NDYsXCJtb2JpbGVcIjpcIjE3MTM1MTkyODU1XCIsXCJvcmRlcklkXCI6MjQ2MjIsXCJvcmdJZFwiOjI1NTY3LFwicHJvSWRcIjozMzA0LFwidXNlcklkXCI6ODY0NzUxLFwidXNlcm5hbWVcIjpcIjE3MTM1MTkyODU1XCIsXCJ2cG5cIjpmYWxzZX0iLCJqdGkiOiIxMDQ0ODQ2IiwiaXNzIjoic3RkLnRpYW55YW5jaGEuY29tIiwibmJmIjoxNTg1NTM2OTYzLCJpYXQiOjE1ODU1MzY5NjMsImV4cCI6MTU4NjE0MTc2M30.hSkmiV5bDHpWYhLU52qQz3-KA_I55x7Wj5TyACMdH9M; UID=864751; UNAME=17135192855; UORG=25567; qimo_seosource_f0615f20-d9d7-11e9-96c6-833900356dc6=%E7%AB%99%E5%86%85; qimo_seokeywords_f0615f20-d9d7-11e9-96c6-833900356dc6=; href=https%3A%2F%2Fstd.tianyancha.com%2Fhome; accessId=f0615f20-d9d7-11e9-96c6-833900356dc6; pageViewNum=1; nice_idf0615f20-d9d7-11e9-96c6-833900356dc6=001c08f1-7232-11ea-a6b8-8d8d788bfa0f; Hm_lpvt_dfd2445765658d46619739a80fb5f6b2=1585536975',
        'Referer':
        'https://std.tianyancha.com/searchx?pn=1&ps=15&pn=1&ps=15&combus=1&ctypeor=1&hasm=2&ismat=1&ht=2&ipam=1&hm=2&hrl=2&hr=2&htm=2&hw=2&hp=2&hd=2&isht=2&islist=2&hls=2&hca=2&het=2&hrr=2&hja=2&hab=2&hph=2&hil=2&heq=2&htc=2&hep=2&hevp=2&hjs=2&hcl=2&hip=2&hzp=2&htcl=2&hl=2&hie=2&hws=2&hc=2&mg=2&c1or=&c2or=&regcg=1000&regcl=90000000&sortkey=establish_time&sortval=desc&regdg=20200101&regdl=20200316',
        'DNT':
        '1',
        'Host':
        'std.tianyancha.com',
        'Sec-Fetch-Dest':
        'document',
        'Sec-Fetch-Mode':
        'navigate',
        'Sec-Fetch-Site':
        'same-origin',
        'Sec-Fetch-User':
        '******',
        'Upgrade-Insecure-Requests':
        '1',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    try:
        re = requests.get(url, headers=headers)
        re.raise_for_status()
        re.encoding = re.apparent_encoding
        return re.text
    except Exception as e:
        print(
            '错误信息',
            e,
        )
        logging.error(traceback.format_exc())