class WebRequest(object): name = 'web_request' def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response() @property def header(self): return {'Connection': 'close', 'Content-Type': 'application/json'} def get(self, url, header=None, retry_time=6, retry_interval=6, timeout=10, *args, **kwargs): """ get method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ # headers = self.es_headers # if header and isinstance(header, dict): # headers.update(header) while True: try: self.response = requests.get(url, headers=header, timeout=timeout, *args, **kwargs) # if self.response.status_code == 200: return self except Exception as e: self.log.error("请求URL地址: %s 错误是: %s" % (url, str(e))) # print("请求URL地址: %s 错误是: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self self.log.info("重新连接 %s 秒后" % retry_interval) # print("重新连接 %s 秒后" % retry_interval) time.sleep(retry_interval) def post_data_json(self, url, header=None, retry_time=3, retry_interval=5, timeout=8, *args, **kwargs): """ post method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ headers = self.header if header and isinstance(header, dict): headers.update(header) while True: try: self.response = requests.post(url, headers=headers, timeout=timeout, *args, **kwargs) return self except Exception as e: print("请求: %s 错误: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self print("重新链接 %s 秒后" % retry_interval) time.sleep(retry_interval) @property def tree(self): return etree.HTML(self.response.content) @property def text(self): return self.response.text
class WebRequest(object): name = "web_request" def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response() @property def user_agent(self): """ return an User-Agent at random :return: """ ua_list = [ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', ] return random.choice(ua_list) @property def header(self): """ basic header :return: """ return { 'User-Agent': self.user_agent, 'Accept': '*/*', 'Connection': 'keep-alive', 'Accept-Language': 'zh-CN,zh;q=0.8' } def get(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args, **kwargs): """ get method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ headers = self.header if header and isinstance(header, dict): headers.update(header) while True: try: self.response = requests.get(url, headers=headers, timeout=timeout, *args, **kwargs) return self except Exception as e: self.log.error("requests: %s error: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self self.log.info("retry %s second after" % retry_interval) time.sleep(retry_interval) @property def tree(self): return etree.HTML(self.response.content) @property def text(self): return self.response.text