def get_content(self, url): headers = {"Accept": "text/html,application/xhtml+xml,application/xml;", "Accept-Encoding": "gzip", "Accept-Language": "zh-CN,zh;q=0.8", "Referer": "http://ent.ifeng.com/", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36" } data_content = requests.get(url, timeout=3, headers=headers).content char_type = chardet.detect(data_content) print char_type if char_type['encoding'] == 'utf-8': data_content = char_change_utf8(data_content) if char_type['encoding'] == 'gbk': data_content = char_change_gbk(data_content) # print chardet.detect(data_content), 'ss' return data_content
def get_content(self, url): data_content = requests.get(url, timeout=3).text return char_change_gbk(data_content)