Exemplo n.º 1
0
 def get_content(self, url):
     headers = {"Accept": "text/html,application/xhtml+xml,application/xml;",
                "Accept-Encoding": "gzip",
                "Accept-Language": "zh-CN,zh;q=0.8",
                "Referer": "http://ent.ifeng.com/",
                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "
                              "(KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
                }
     data_content = requests.get(url, timeout=3, headers=headers).content
     char_type = chardet.detect(data_content)
     print char_type
     if char_type['encoding'] == 'utf-8':
         data_content = char_change_utf8(data_content)
     if char_type['encoding'] == 'gbk':
         data_content = char_change_gbk(data_content)
     # print chardet.detect(data_content), 'ss'
     return data_content
Exemplo n.º 2
0
 def get_content(self, url):
     data_content = requests.get(url, timeout=3).text
     return char_change_gbk(data_content)