def _verify_get(self, url, times=0, headers=default_headers, refresh_ip=False, timeout=download_timeout): headers.update({'User-Agent': self._user_agent}) try: response = self._http_client.get(url, headers=headers, timeout=timeout) if response.status_code == 200: logging.debug(response.headers) pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() else: raise ErrorStatusCode(response.status_code) return response except Error403, err: raise err
def _verify_get(self, url): # kwargs.setdefault("allow_redirects", False) response = self._http_client.get(url) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] user_verify_url = urljoin("http://qiye.qianzhan.com/", location) is_success = self.do_verify(user_verify_url) if is_success: response = self._verify_get(url) else: is_success = self.login() if is_success: response = self._http_client.get(url) else: raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: is_success = self.login() if is_success: response = self._http_client.get(url) else: raise Error404() else: raise ErrorStatusCode() return response
def _verify_post(self, url, data=None, json=None, times=0, headers=default_headers, timeout=download_timeout): # headers.update({ # 'User-Agent': self._user_agent, # # "Proxy-Authorization": self.get_authHeader() # }) try: response = self._http_client.post(url=url, data=data, json=json, headers=headers, timeout=timeout) if response.status_code == 200: logging.debug(response.headers) pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() elif response.status_code == 503: raise Error503() else: raise ErrorStatusCode(response.status_code) return response except Error403, err: raise err
def _verify_post(self, url, data=None, json=None, times=0): try: response = self._http_client.post(url, data, json) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() elif response.status_code == 503: raise Error503() else: raise ErrorStatusCode(response.status_code) return response except HttpClientError, err: times += 1 if times < 3: return self._verify_post(url, data=data, json=json, times=times) else: raise err
def _verify_get(self, url, **kwargs): kwargs.setdefault("allow_redirects", False) response = self._http_client.get(url, **kwargs) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() else: raise ErrorStatusCode() return response
def _check_response(self, response): if response.status_code == 200: logging.debug(response.headers) pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() elif response.status_code == 503: raise Error503() else: raise ErrorStatusCode(response.status_code) return response
class SiteClient(object): def __init__(self): self._http_client = requests.Session() pass # # def _verify_post(self, url, data=None, json=None, times=0, headers=default_headers): # try: # response = self._http_client.post(url, data, json, headers) # if response.status_code == 200: # pass # elif response.status_code == 302: # location = response.headers['Location'] # logging.debug("location: %s" % location) # raise Error302() # elif response.status_code == 403: # raise Error403() # elif response.status_code == 404: # raise Error404() # elif response.status_code == 502: # raise Error502() # elif response.status_code == 503: # raise Error503() # else: # raise ErrorStatusCode(response.status_code) # return response # # except HttpClientError, err: # times += 1 # if times < 3: # return self._verify_post(url, data=data, json=json, times=times, headers=headers) # else: # raise err # # def _verify_get(self, url, times=0, headers=default_headers): # try: # response = self._http_client.get(url) # if response.status_code == 200: # pass # elif response.status_code == 302: # location = response.headers['Location'] # logging.debug("location: %s" % location) # raise Error302() # elif response.status_code == 403: # raise Error403() # elif response.status_code == 404: # raise Error404() # elif response.status_code == 502: # raise Error502() # else: # raise ErrorStatusCode(response.status_code) # return response # except HttpClientError, err: # times += 1 # if times < 3: # return self._verify_get(url, times=times, headers=headers) # else: # raise err # -------------right main---------------------------- # def get_detail(self, company_name): # url = "http://baike.baidu.com/item/" + company_name # # url = "http://baike.baidu.com/item/%E5%9C%A8%E7%BA%BF%E9%80%94%E6%B8%B8%EF%BC%88%E5%8C%97%E4%BA%AC%EF%BC%89%E7%A7%91%E6%8A%80%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8" # try: # response = requests.get(url, timeout=5, allow_redirects=False) # if response.status_code == 200: # return response # else: # raise Exception("response status_code error %s" % response.status_code) # except Exception, e: # logging.info(e.message) # return None def get_search(self, company_name): url = "http://baike.baidu.com/search/word?word=" + company_name try: response = requests.get(url, timeout=5) except Exception, e: logging.info(e.message) return None if response.status_code == 200: return response elif response.status_code == 500: raise Error500() else: raise ErrorStatusCode(response.status_code)