def wrapper(*args, **kw): try: doc = lhtml.document_fromstring(get(url, cache=True, **kw)) return fn(doc, *args, **kw) except Exception: write_cache(url, None) doc = lhtml.document_fromstring(get(url, cache=True, **kw)) return fn(doc, *args, **kw)
def get(url, cache=False, headers={}, verify=True, timeout=REQUEST_TIMEOUT, **kwargs): if not url.startswith('http'): return if cache: response = read_cache(url) else: response = None if not response: headers.update(default_headers) response = requests.get(url, headers=headers, verify=verify, timeout=timeout, **kwargs) response.raise_for_status() if cache: write_cache(url, response) # Fix charset if necessary if 'Content-Type' in response.headers: content_type = response.headers['Content-Type'] if 'text/html' in content_type and 'charset' not in content_type: doc = lhtml.document_fromstring(response.text) head = doc.find("head") metas = head.findall("meta") for meta in metas: http_equiv = meta.get("http-equiv") if http_equiv != None and http_equiv.lower() == "content-type": contents = [ x.strip() for x in meta.get("content").split(";") ] for content in contents: splitted = content.split("=") if splitted[0] != None and splitted[0].lower( ) == "charset": response.encoding = splitted[1] return response.text if meta.get("charset"): response.encoding = meta.get("charset") return response.text return response.text
def get(url, cache=False, headers={}, verify=True, timeout=REQUEST_TIMEOUT, **kwargs): if not url.startswith('http'): return if cache: response = read_cache(url) else: response = None if not response: headers.update(default_headers) response = requests.get(url, headers=headers, verify=verify, timeout=timeout, **kwargs) response.raise_for_status() if cache: write_cache(url, response) # Fix charset if necessary if 'Content-Type' in response.headers: content_type = response.headers['Content-Type'] if 'text/html' in content_type and 'charset' not in content_type: doc = lhtml.document_fromstring(response.text) head = doc.find("head") metas = head.findall("meta") for meta in metas: http_equiv = meta.get("http-equiv") if http_equiv != None and http_equiv.lower() == "content-type": contents = [x.strip() for x in meta.get("content").split(";")] for content in contents: splitted = content.split("=") if splitted[0] != None and splitted[0].lower() == "charset": response.encoding = splitted[1] return response.text if meta.get("charset"): response.encoding = meta.get("charset") return response.text return response.text