Exemplo n.º 1
0
 def _verify_get(self,
                 url,
                 times=0,
                 headers=default_headers,
                 refresh_ip=False,
                 timeout=download_timeout):
     headers.update({'User-Agent': self._user_agent})
     try:
         response = self._http_client.get(url,
                                          headers=headers,
                                          timeout=timeout)
         if response.status_code == 200:
             logging.debug(response.headers)
             pass
         elif response.status_code == 302:
             location = response.headers['Location']
             logging.debug("location: %s" % location)
             raise Error302()
         elif response.status_code == 403:
             raise Error403()
         elif response.status_code == 404:
             raise Error404()
         elif response.status_code == 502:
             raise Error502()
         else:
             raise ErrorStatusCode(response.status_code)
         return response
     except Error403, err:
         raise err
 def _verify_get(self, url):
     # kwargs.setdefault("allow_redirects", False)
     response = self._http_client.get(url)
     if response.status_code == 200:
         pass
     elif response.status_code == 302:
         location = response.headers['Location']
         user_verify_url = urljoin("http://qiye.qianzhan.com/", location)
         is_success = self.do_verify(user_verify_url)
         if is_success:
             response = self._verify_get(url)
         else:
             is_success = self.login()
             if is_success:
                 response = self._http_client.get(url)
             else:
                 raise Error302()
     elif response.status_code == 403:
         raise Error403()
     elif response.status_code == 404:
         is_success = self.login()
         if is_success:
             response = self._http_client.get(url)
         else:
             raise Error404()
     else:
         raise ErrorStatusCode()
     return response
Exemplo n.º 3
0
    def _verify_post(self, url, data=None, json=None, times=0, headers=default_headers, timeout=download_timeout):

        # headers.update({
        #     'User-Agent': self._user_agent,
        #     # "Proxy-Authorization": self.get_authHeader()
        # })

        try:
            response = self._http_client.post(url=url, data=data, json=json, headers=headers, timeout=timeout)
            if response.status_code == 200:
                logging.debug(response.headers)
                pass
            elif response.status_code == 302:
                location = response.headers['Location']
                logging.debug("location: %s" % location)
                raise Error302()
            elif response.status_code == 403:
                raise Error403()
            elif response.status_code == 404:
                raise Error404()
            elif response.status_code == 502:
                raise Error502()
            elif response.status_code == 503:
                raise Error503()
            else:
                raise ErrorStatusCode(response.status_code)
            return response
        except Error403, err:
            raise err
Exemplo n.º 4
0
    def _verify_post(self, url, data=None, json=None, times=0):
        try:
            response = self._http_client.post(url, data, json)
            if response.status_code == 200:
                pass
            elif response.status_code == 302:
                location = response.headers['Location']
                logging.debug("location: %s" % location)
                raise Error302()
            elif response.status_code == 403:
                raise Error403()
            elif response.status_code == 404:
                raise Error404()
            elif response.status_code == 502:
                raise Error502()
            elif response.status_code == 503:
                raise Error503()
            else:
                raise ErrorStatusCode(response.status_code)
            return response

        except HttpClientError, err:
            times += 1
            if times < 3:
                return self._verify_post(url, data=data, json=json, times=times)
            else:
                raise err
Exemplo n.º 5
0
 def _verify_get(self, url, **kwargs):
     kwargs.setdefault("allow_redirects", False)
     response = self._http_client.get(url, **kwargs)
     if response.status_code == 200:
         pass
     elif response.status_code == 302:
         location = response.headers['Location']
         logging.debug("location: %s" % location)
         raise Error302()
     elif response.status_code == 403:
         raise Error403()
     elif response.status_code == 404:
         raise Error404()
     else:
         raise ErrorStatusCode()
     return response
Exemplo n.º 6
0
 def _check_response(self, response):
     if response.status_code == 200:
         logging.debug(response.headers)
         pass
     elif response.status_code == 302:
         location = response.headers['Location']
         logging.debug("location: %s" % location)
         raise Error302()
     elif response.status_code == 403:
         raise Error403()
     elif response.status_code == 404:
         raise Error404()
     elif response.status_code == 502:
         raise Error502()
     elif response.status_code == 503:
         raise Error503()
     else:
         raise ErrorStatusCode(response.status_code)
     return response
Exemplo n.º 7
0
class SiteClient(object):
    def __init__(self):
        self._http_client = requests.Session()
        pass

    #
    # def _verify_post(self, url, data=None, json=None, times=0, headers=default_headers):
    #     try:
    #         response = self._http_client.post(url, data, json, headers)
    #         if response.status_code == 200:
    #             pass
    #         elif response.status_code == 302:
    #             location = response.headers['Location']
    #             logging.debug("location: %s" % location)
    #             raise Error302()
    #         elif response.status_code == 403:
    #             raise Error403()
    #         elif response.status_code == 404:
    #             raise Error404()
    #         elif response.status_code == 502:
    #             raise Error502()
    #         elif response.status_code == 503:
    #             raise Error503()
    #         else:
    #             raise ErrorStatusCode(response.status_code)
    #         return response
    #
    #     except HttpClientError, err:
    #         times += 1
    #         if times < 3:
    #             return self._verify_post(url, data=data, json=json, times=times, headers=headers)
    #         else:
    #             raise err
    #
    # def _verify_get(self, url, times=0, headers=default_headers):
    #     try:
    #         response = self._http_client.get(url)
    #         if response.status_code == 200:
    #             pass
    #         elif response.status_code == 302:
    #             location = response.headers['Location']
    #             logging.debug("location: %s" % location)
    #             raise Error302()
    #         elif response.status_code == 403:
    #             raise Error403()
    #         elif response.status_code == 404:
    #             raise Error404()
    #         elif response.status_code == 502:
    #             raise Error502()
    #         else:
    #             raise ErrorStatusCode(response.status_code)
    #         return response
    #     except HttpClientError, err:
    #         times += 1
    #         if times < 3:
    #             return self._verify_get(url, times=times, headers=headers)
    #         else:
    #             raise err

    # -------------right main----------------------------
    # def get_detail(self, company_name):
    #     url = "http://baike.baidu.com/item/" + company_name
    #     # url = "http://baike.baidu.com/item/%E5%9C%A8%E7%BA%BF%E9%80%94%E6%B8%B8%EF%BC%88%E5%8C%97%E4%BA%AC%EF%BC%89%E7%A7%91%E6%8A%80%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"
    #     try:
    #         response = requests.get(url, timeout=5, allow_redirects=False)
    #         if response.status_code == 200:
    #             return response
    #         else:
    #             raise Exception("response status_code error %s" % response.status_code)
    #     except Exception, e:
    #         logging.info(e.message)
    #         return None

    def get_search(self, company_name):
        url = "http://baike.baidu.com/search/word?word=" + company_name
        try:
            response = requests.get(url, timeout=5)
        except Exception, e:
            logging.info(e.message)
            return None

        if response.status_code == 200:
            return response
        elif response.status_code == 500:
            raise Error500()
        else:
            raise ErrorStatusCode(response.status_code)