def configure_http_session(size=20, max_retries=1, _session=None): """ Return a :class:`requests.Session` object configured with a :class:`requests.adapters.HTTPAdapter` (connection pool) for http and https connections. :param size: The connection pool and maximum size. :type size: int :param max_retries: The maximum number of retries for each connection. :type max_retries: int :param _session: Test-only hook to provide a pre-configured session. """ if _session is not None: return _session adapter = HTTPAdapter( pool_connections=size, pool_maxsize=size, max_retries=max_retries, ) session = Session() session.mount("http://", adapter) session.mount("https://", adapter) session.max_redirects = 1 session.verify = certifi.where() return session
def create_session(): adapter = HTTPAdapter(max_retries=3) session = Session() default_ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36" user_agent = current_app.config.get("USER_AGENT", default_ua) session.headers["User-Agent"] = user_agent session.mount("http://", adapter) session.mount("https://", adapter) return session
class BaseRequest(object): def __init__(self, session=None): if not isinstance(session, Session): self.session = Session() self.session.mount('http://', HTTPAdapter(max_retries=1, pool_maxsize=50)) self.session.mount('http://', HTTPAdapter(max_retries=1, pool_maxsize=50)) else: self.session = session @set_default def get(self, url, params, timeout=1, callback=None, **kwargs): with self.catch_exception(): r = self._get_result(url, params, timeout, **kwargs) if callable(callback): callback(r) return r @set_default def post(self, url, data, timeout=1, callback=None, **kwargs): with self.catch_exception(): r = self._post_result(url, data, timeout, **kwargs) if callable(callback): callback(r) return r def _get_result(self, url, params, timeout, **kwargs): r = self.session.get(url, params=params, timeout=timeout, **kwargs) r.raise_for_status() return r def _post_result(self, url, data, timeout, **kwargs): r = self.session.post(url, data, timeout=timeout, **kwargs) r.raise_for_status() return r @contextmanager def catch_exception(self): try: yield except(ConnectionError, Timeout) as err: raise VendorConnectionError(str(err), data=err) except HTTPError as err: raise VendorHTTPError(str(err), data=err) except RequestException as err: raise VendorRequestError(str(err), data=err)
def _run_request(self, request): """ Executes HTTP GET request with timeout using the endpoint defined upon client creation. """ session = Session() session.mount("http://", HTTPAdapter(max_retries=self._TOTAL_RETRIES)) session.mount("https://", HTTPAdapter(max_retries=self._TOTAL_RETRIES)) result = session.get(self.endpoint + "?" + request, headers=self._get_custom_headers(), timeout=self.timeout) result.raise_for_status() return result
class HttpClient: """ 使用requests库封装的高可靠 Http client :param max_connect_retries: The maximum number of retries each connection should attempt.Note, this applies only to failed DNS lookups, socket connections and connection timeouts, never to requests where data has made it to the server. :param max_request_tries: The maximum times of tries each request should attempt. """ def __init__(self, max_connect_retries=0, max_request_tries=0): self.timeout = DEFAULT_TIMOUT self.max_connect_retries = (max_connect_retries or DEFAULT_CONNECT_RETRIES) self.max_request_tries = max_request_tries or DEFAULT_REQUEST_TRIES self.session = Session() retries = Retry(connect=2, read=2, status=2, redirect=2) self.session.mount('https://', HTTPAdapter(max_retries=retries)) self.session.mount('http://', HTTPAdapter(max_retries=retries)) def get(self, url, content_type='json', max_request_times=0, timeout=0): max_times = max_request_times or self.max_request_tries has_request_times = 0 data = None while has_request_times < max_times: try: res = self.session.get(url, timeout=timeout or self.timeout) data = res.json() if content_type == 'json' else res.text if not data: has_request_times = has_request_times + 1 continue else: break except requests.exceptions.ConnectionError as e: print("socket连接错误或读取超时", e.__class__) break except Exception: # raise has_request_times = has_request_times + 1 continue if not data: print("尝试了{}次请求依然失败".format(has_request_times + 1)) else: print("尝试了{}次请求成功".format(has_request_times + 1)) self.session.close() return data def post(self, url): pass
def get_requests_session() -> Session: """ Set connection pool maxsize and block value to avoid `connection pool full` warnings. :return: requests session """ session = Session() session.mount( "http://", HTTPAdapter(pool_connections=25, pool_maxsize=25, pool_block=True)) session.mount( "https://", HTTPAdapter(pool_connections=25, pool_maxsize=25, pool_block=True)) return session
def _run_request(self, request): """ Executes HTTP GET request with timeout using the endpoint defined upon client creation. """ session = Session() if (self.proxy_server_name != None): proxy_server = self.proxy_server_name if (self.proxy_server_port != None): proxy_server = proxy_server +":"+self.proxy_server_port proxies = {'https': proxy_server } session.proxies.update(proxies) session.mount("http://", HTTPAdapter(max_retries=self._TOTAL_RETRIES)) session.mount("https://", HTTPAdapter(max_retries=self._TOTAL_RETRIES)) result = session.get(self.endpoint + "?" + request, headers=self._get_custom_headers(), timeout=self.timeout) result.raise_for_status() return result
class RequestsClient(HttpClient): """An implementation of HttpClient that uses Requests as its HTTP Client Attributes: timeout (int): The default timeout for all API requests. """ def __init__(self, timeout=60, cache=False, max_retries=None, verify=True): """The constructor. Args: timeout (float): The default global timeout(seconds). """ self.timeout = timeout self.session = Session() retries = Retry(total=max_retries) self.session.mount('https://', HTTPAdapter(max_retries=retries)) self.session.verify = verify def execute(self, request): """Execute a given HttpRequest to get a string response back Args: request (HttpRequest): The given HttpRequest to execute. Returns: HttpResponse: The response of the HttpRequest. """ response = self.session.request(HttpMethodEnum.to_string( request.http_method), request.query_url, headers=request.headers, params=request.query_parameters, data=request.parameters, timeout=self.timeout) return self.convert_response(response, request) def convert_response(self, response, http_request): """Converts the Response object of the HttpClient into an HttpResponse object. Args: response (dynamic): The original response object. http_request (HttpRequest): The original HttpRequest object. Returns: HttpResponse: The converted HttpResponse object. """ return HttpResponse(response.status_code, response.text, response.headers, http_request)
class MWS(object): """ Base Amazon API class """ # This is used to post/get to the different uris used by amazon per api # ie. /Orders/2011-01-01 # All subclasses must define their own URI only if needed URI = "/" # The API version varies in most amazon APIs VERSION = "2009-01-01" # There seem to be some xml namespace issues. therefore every api subclass # is recommended to define its namespace, so that it can be referenced # like so AmazonAPISubclass.NS. # For more information see http://stackoverflow.com/a/8719461/389453 NS = '' # Some APIs are available only to either a "Merchant" or "Seller" # the type of account needs to be sent in every call to the amazon MWS. # This constant defines the exact name of the parameter Amazon expects # for the specific API being used. # All subclasses need to define this if they require another account type # like "Merchant" in which case you define it like so. # ACCOUNT_TYPE = "Merchant" # Which is the name of the parameter for that specific account type. ACCOUNT_TYPE = "SellerId" def __init__(self, access_key, secret_key, account_id, domain='https://mws.amazonservices.com', uri="", version=""): self.access_key = access_key self.secret_key = secret_key self.account_id = account_id self.domain = domain self.uri = uri or self.URI self.version = version or self.VERSION self.session = Session() bucket_key = getattr(settings, 'RUNSCOPE_BUCKET_KEY', None) if bucket_key: logger.info("Redirecting API calls for MWS to runscope") self.configure_runscope(bucket_key) def configure_runscope(self, bucket_key): """ Configure all connections to be proxied through runscope for easier debugging and logging of all requests and responses. *bucket_key* is API for the bucket you want to use for all the request. Check Runscope for more details on that. """ try: from requests_runscope import RunscopeAdapter except ImportError: logger.error( "Could not import runscope adapter. Is requests-runscope " "installed? Try running pip install requests-runscope.") else: logger.info('Mounting runscope proxy adapter for bucket {}'.format( bucket_key)) self.session.mount('https://', RunscopeAdapter(bucket_key)) self.session.mount('http://', RunscopeAdapter(bucket_key)) def _get_quote_params(self, params): quoted_params = [] for key in sorted(params): value = urllib.quote(unicode(params[key]).encode('utf-8'), safe='-_.~') quoted_params.append("{}={}".format(key, value)) return '&'.join(quoted_params) def make_request(self, extra_data, method="GET", **kwargs): """ Make request to Amazon MWS API with these parameters """ # Remove all keys with an empty value because # Amazon's MWS does not allow such a thing. extra_data = remove_empty(extra_data) params = { 'AWSAccessKeyId': self.access_key, self.ACCOUNT_TYPE: self.account_id, 'SignatureVersion': '2', 'Timestamp': self.get_timestamp(), 'Version': self.version, 'SignatureMethod': 'HmacSHA256', } params.update(extra_data) logger.debug("Request Parameters: {}".format(params)) request_description = self._get_quote_params(params) signature = self.calc_signature(method, request_description) logger.debug('Domain: {} URI: {}'.format(self.domain, self.uri)) url = '%s%s?%s&Signature=%s' % (self.domain, self.uri, request_description, urllib.quote(signature)) headers = {'User-Agent': 'python-amazon-mws/0.0.1 (Language=Python)'} headers.update(kwargs.get('extra_headers', {})) try: # Some might wonder as to why i don't pass the params dict as the # params argument to request. My answer is, here i have to get the # url parsed string of params in order to sign it, so if i pass the # params dict as params to request, request will repeat that step # because it will need to convert the dict to a url parsed string, # so why do it twice if i can just pass the full url :). response = self.session.request(method, url, data=kwargs.get('body', ''), headers=headers) response.raise_for_status() # When retrieving data from the response object, be aware that # response.content returns the content in bytes while response.text # calls response.content and converts it to unicode. data = response.content # I do not check the headers to decide which content structure to # server simply because sometimes Amazon's MWS API returns XML # error responses with "text/plain" as the Content-Type. action = extra_data.get('Action') if not action.endswith('Result'): action = "{}Result".format(action) try: parsed_response = DictWrapper(data, action) except XMLError: parsed_response = DataWrapper(data, response.headers) except HTTPError, e: error = MWSError(unicode(e)) error.response = e.response error.url = url logger.error("Received {} with message: {}".format( unicode(e), e.response.content)) raise error # Store the response object in the parsed_response for quick access parsed_response.response = response logger.debug("Received response: {}".format(response.content)) return parsed_response
def DebugSession(session=None): if session is None: session = Session() session.mount('https://', DebugAdapter()) session.mount('http://', DebugAdapter()) return session
from bs4 import BeautifulSoup from requests.sessions import HTTPAdapter, Session from requests import get import pandas as pd from time import sleep from random import randrange import re # start # create max entries to main site s = Session() s.mount( '/fees-scholarships/scholarships/find/international-student-scholarships/arts-merit', HTTPAdapter(max_retries=1000)) faculty = [] scholarship = [] state = [] total_value = [] link = [] missed = [] for tag in [['international', 'content_container_667868']]: sleep(randrange(1, 4)) url = 'http://www.monash.edu/students/scholarships/current/' + tag[0] response = get(url) soup = BeautifulSoup(response.text, 'html.parser') n = soup.find('div', id=tag[1]) t = n.find_all('li') response.close()
class PortalConnection(object): """ Connection to HubSpot :param authentication_key: This can be either an :class:`APIKey` or an \ :class:`OAuthKey` instance :param basestring change_source: The string passed to HubSpot as \ ``auditId`` in the query string """ _API_URL = 'https://api.hubapi.com' def __init__(self, authentication_key, change_source): super(PortalConnection, self).__init__() self._authentication_handler = \ _QueryStringAuthenticationHandler(authentication_key) self._change_source = change_source self._session = Session() self._session.headers['User-Agent'] = _USER_AGENT http_adapter = HTTPAdapter(max_retries=_HTTP_CONNECTION_MAX_RETRIES) self._session.mount('', http_adapter) def send_get_request(self, url_path, query_string_args=None): """ Send a GET request to HubSpot :param basestring url_path: The URL path to the endpoint :param dict query_string_args: The query string arguments :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request('GET', url_path, query_string_args) def send_post_request(self, url_path, body_deserialization): """ Send a POST request to HubSpot :param basestring url_path: The URL path to the endpoint :param dict body_deserialization: The request's body message \ deserialized :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request( 'POST', url_path, body_deserialization=body_deserialization, ) def send_put_request(self, url_path, body_deserialization): """ Send a PUT request to HubSpot :param basestring url_path: The URL path to the endpoint :param body_deserialization: The request's body message deserialized :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request( 'PUT', url_path, body_deserialization=body_deserialization, ) def send_delete_request(self, url_path): """ Send a DELETE request to HubSpot :param basestring url_path: The URL path to the endpoint :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request('DELETE', url_path) def _send_request( self, method, url_path, query_string_args=None, body_deserialization=None, ): url = self._API_URL + url_path query_string_args = query_string_args or {} query_string_args = dict(query_string_args, auditId=self._change_source) request_headers = \ {'content-type': 'application/json'} if body_deserialization else {} if body_deserialization: request_body_serialization = json_serialize(body_deserialization) else: request_body_serialization = None response = self._session.request( method, url, params=query_string_args, auth=self._authentication_handler, data=request_body_serialization, headers=request_headers, ) response_body_deserialization = \ self._deserialize_response_body(response) return response_body_deserialization @classmethod def _deserialize_response_body(cls, response): cls._require_successful_response(response) cls._require_json_response(response) if response.status_code == HTTP_STATUS_OK: response_body_deserialization = response.json() elif response.status_code in _HTTP_STATUS_CODES_WITH_EMPTY_BODIES: response_body_deserialization = None else: exception_message = \ 'Unsupported response status {}'.format(response.status_code) raise HubspotUnsupportedResponseError(exception_message) return response_body_deserialization @staticmethod def _require_successful_response(response): if 400 <= response.status_code < 500: response_data = response.json() error_data = _HUBSPOT_ERROR_RESPONSE_SCHEMA(response_data) if response.status_code == HTTP_STATUS_UNAUTHORIZED: exception_class = HubspotAuthenticationError else: exception_class = HubspotClientError raise exception_class( error_data['message'], error_data['requestId'], ) elif 500 <= response.status_code < 600: raise HubspotServerError(response.reason, response.status_code) @staticmethod def _require_json_response(response): content_type_header_value = response.headers.get('Content-Type') if not content_type_header_value: exception_message = 'Response does not specify a Content-Type' raise HubspotUnsupportedResponseError(exception_message) content_type = content_type_header_value.split(';')[0].lower() if content_type != 'application/json': exception_message = \ 'Unsupported response content type {}'.format(content_type) raise HubspotUnsupportedResponseError(exception_message) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self._session.close()
class PortalConnection(object): """ Connection to HubSpot :param authentication_key: This can be either an :class:`APIKey` or an \ :class:`OAuthKey` instance :param basestring change_source: The string passed to HubSpot as \ ``auditId`` in the query string """ _API_URL = 'https://api.hubapi.com' def __init__(self, authentication_key, change_source): super(PortalConnection, self).__init__() self._authentication_handler = \ _QueryStringAuthenticationHandler(authentication_key) self._change_source = change_source self._session = Session() self._session.headers['User-Agent'] = _USER_AGENT http_adapter = HTTPAdapter(max_retries=_HTTP_CONNECTION_MAX_RETRIES) self._session.mount('', http_adapter) def send_get_request(self, url_path, query_string_args=None): """ Send a GET request to HubSpot :param basestring url_path: The URL path to the endpoint :param dict query_string_args: The query string arguments :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request('GET', url_path, query_string_args) def send_post_request(self, url_path, body_deserialization): """ Send a POST request to HubSpot :param basestring url_path: The URL path to the endpoint :param dict body_deserialization: The request's body message \ deserialized :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request( 'POST', url_path, body_deserialization=body_deserialization, ) def send_put_request(self, url_path, body_deserialization): """ Send a PUT request to HubSpot :param basestring url_path: The URL path to the endpoint :param body_deserialization: The request's body message deserialized :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request( 'PUT', url_path, body_deserialization=body_deserialization, ) def send_delete_request(self, url_path): """ Send a DELETE request to HubSpot :param basestring url_path: The URL path to the endpoint :return: Decoded version of the ``JSON`` that HubSpot put in \ the body of the response. """ return self._send_request('DELETE', url_path) def _send_request( self, method, url_path, query_string_args=None, body_deserialization=None, ): url = self._API_URL + url_path query_string_args = query_string_args or {} query_string_args = dict(query_string_args, auditId=self._change_source) request_headers = \ {'content-type': 'application/json'} if body_deserialization else {} if body_deserialization: request_body_serialization = json_serialize(body_deserialization) else: request_body_serialization = None response = self._session.request( method, url, params=query_string_args, auth=self._authentication_handler, data=request_body_serialization, headers=request_headers, ) response_body_deserialization = \ self._deserialize_response_body(response) return response_body_deserialization @classmethod def _deserialize_response_body(cls, response): cls._require_successful_response(response) if response.status_code == HTTP_STATUS_OK: cls._require_json_response(response) response_body_deserialization = response.json() or None elif response.status_code in _HTTP_STATUS_CODES_WITH_EMPTY_BODIES: response_body_deserialization = None else: exception_message = \ 'Unsupported response status {}'.format(response.status_code) raise HubspotUnsupportedResponseError(exception_message) return response_body_deserialization @staticmethod def _require_successful_response(response): if 400 <= response.status_code < 500: response_data = response.json() error_data = _HUBSPOT_ERROR_RESPONSE_SCHEMA(response_data) if response.status_code == HTTP_STATUS_UNAUTHORIZED: exception_class = HubspotAuthenticationError else: exception_class = HubspotClientError raise exception_class( error_data['message'], error_data['requestId'], ) elif 500 <= response.status_code < 600: raise HubspotServerError(response.reason, response.status_code) @staticmethod def _require_json_response(response): content_type_header_value = response.headers.get('Content-Type') if not content_type_header_value: exception_message = 'Response does not specify a Content-Type' raise HubspotUnsupportedResponseError(exception_message) content_type = content_type_header_value.split(';')[0].lower() if content_type != 'application/json': exception_message = \ 'Unsupported response content type {}'.format(content_type) raise HubspotUnsupportedResponseError(exception_message) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self._session.close()
class MWS(object): """ Base Amazon API class """ # This is used to post/get to the different uris used by amazon per api # ie. /Orders/2011-01-01 # All subclasses must define their own URI only if needed URI = "/" # The API version varies in most amazon APIs VERSION = "2009-01-01" # There seem to be some xml namespace issues. therefore every api subclass # is recommended to define its namespace, so that it can be referenced # like so AmazonAPISubclass.NS. # For more information see http://stackoverflow.com/a/8719461/389453 NS = '' # Some APIs are available only to either a "Merchant" or "Seller" # the type of account needs to be sent in every call to the amazon MWS. # This constant defines the exact name of the parameter Amazon expects # for the specific API being used. # All subclasses need to define this if they require another account type # like "Merchant" in which case you define it like so. # ACCOUNT_TYPE = "Merchant" # Which is the name of the parameter for that specific account type. ACCOUNT_TYPE = "SellerId" def __init__(self, access_key, secret_key, account_id, domain='https://mws.amazonservices.com', uri="", version=""): self.access_key = access_key self.secret_key = secret_key self.account_id = account_id self.domain = domain self.uri = uri or self.URI self.version = version or self.VERSION self.session = Session() bucket_key = getattr(settings, 'RUNSCOPE_BUCKET_KEY', None) if bucket_key: logger.info("Redirecting API calls for MWS to runscope") self.configure_runscope(bucket_key) def configure_runscope(self, bucket_key): """ Configure all connections to be proxied through runscope for easier debugging and logging of all requests and responses. *bucket_key* is API for the bucket you want to use for all the request. Check Runscope for more details on that. """ try: from requests_runscope import RunscopeAdapter except ImportError: logger.error( "Could not import runscope adapter. Is requests-runscope " "installed? Try running pip install requests-runscope." ) else: logger.info( 'Mounting runscope proxy adapter for bucket {}'.format( bucket_key ) ) self.session.mount('https://', RunscopeAdapter(bucket_key)) self.session.mount('http://', RunscopeAdapter(bucket_key)) def _get_quote_params(self, params): quoted_params = [] for key in sorted(params): value = urllib.quote( unicode(params[key]).encode('utf-8'), safe='-_.~' ) quoted_params.append("{}={}".format(key, value)) return '&'.join(quoted_params) def make_request(self, extra_data, method="GET", **kwargs): """ Make request to Amazon MWS API with these parameters """ # Remove all keys with an empty value because # Amazon's MWS does not allow such a thing. extra_data = remove_empty(extra_data) params = { 'AWSAccessKeyId': self.access_key, self.ACCOUNT_TYPE: self.account_id, 'SignatureVersion': '2', 'Timestamp': self.get_timestamp(), 'Version': self.version, 'SignatureMethod': 'HmacSHA256', } params.update(extra_data) logger.debug("Request Parameters: {}".format(params)) request_description = self._get_quote_params(params) signature = self.calc_signature(method, request_description) logger.debug('Domain: {} URI: {}'.format(self.domain, self.uri)) url = '%s%s?%s&Signature=%s' % (self.domain, self.uri, request_description, urllib.quote(signature)) headers = {'User-Agent': 'python-amazon-mws/0.0.1 (Language=Python)'} headers.update(kwargs.get('extra_headers', {})) try: # Some might wonder as to why i don't pass the params dict as the # params argument to request. My answer is, here i have to get the # url parsed string of params in order to sign it, so if i pass the # params dict as params to request, request will repeat that step # because it will need to convert the dict to a url parsed string, # so why do it twice if i can just pass the full url :). response = self.session.request( method, url, data=kwargs.get('body', ''), headers=headers) response.raise_for_status() # When retrieving data from the response object, be aware that # response.content returns the content in bytes while response.text # calls response.content and converts it to unicode. data = response.content # I do not check the headers to decide which content structure to # server simply because sometimes Amazon's MWS API returns XML # error responses with "text/plain" as the Content-Type. action = extra_data.get('Action') if not action.endswith('Result'): action = "{}Result".format(action) try: parsed_response = DictWrapper(data, action) except XMLError: parsed_response = DataWrapper(data, response.headers) except HTTPError, e: error = MWSError(unicode(e)) error.response = e.response error.url = url logger.error( "Received {} with message: {}".format( unicode(e), e.response.content ) ) raise error # Store the response object in the parsed_response for quick access parsed_response.response = response logger.debug("Received response: {}".format(response.content)) return parsed_response
class SessionState: def __init__(self, cache_file: Path, cache_key: str, redis: str, user_requested=False): self.user_requested = user_requested self._cache_file = cache_file self._cache_key = cache_key self._cache: Optional[SqliteDict] = None random.seed() self._session_key = random.randint(0, 999999) self._redis = Redis(host=redis) if not user_requested: self._open() if self._cache_key != self._cache.get("_cache_key_", None): self._cache.close() self._cache: Optional[SqliteDict] = None self._cache_file.unlink() self._open() self._cache["_cache_key_"] = self._cache_key self.session = Session() # noinspection PyTypeChecker self.session.mount( 'https://', HTTPAdapter(max_retries=Retry(total=3, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]))) self.sites = {} self.wikidata = Sparql() self.primary_site = self.get_site(primary_domain) def __enter__(self): return self def __exit__(self, typ, value, traceback): self.session.close() if self._cache is not None: self._cache.close() self._cache = None print(f'Closed SQL connection for {self._session_key} at {datetime.utcnow()}') def _open(self): if self._cache is None: print(f'Opening SQL connection for {self._session_key} at {datetime.utcnow()}') self._cache_file.parent.mkdir(parents=True, exist_ok=True) self._cache = SqliteDict(self._cache_file, autocommit=True) def get_site(self, domain: Domain) -> WikiSite: try: return self.sites[domain] except KeyError: # noinspection PyTypeChecker site = WikiSite(domain, self.session, domain == primary_domain) if self.user_requested: site.maxlag = None self.sites[domain] = site return site def delete_cached_items(self, prefix: str) -> None: self._open() for vv in {v for v in self._cache.keys() if v.startswith(prefix)}: del self._cache[vv] def del_obj(self, key: str) -> Any: self._redis.delete(self.redis_key(key)) self._open() print(f"%% del {key}") return self._cache.pop(key, None) def load_obj(self, key: str, default: Any = None) -> Any: value = self._redis.get(self.redis_key(key)) if value is not None: return loads(value) self._open() print(f"%% load {key}") value = self._cache.get(key, default) self._redis.set(self.redis_key(key), dumps(value)) return value def save_obj(self, key: str, value: Any): self._open() print(f"%% save {key}") self._cache[key] = value self._redis.set(self.redis_key(key), dumps(value)) def redis_key(self, key: str): return self._cache_key + key
return data USER_AGENT_STR = ( ( "Raiden/{raiden}/DB:{raiden_db_version}/{python_implementation}/" "{python_version}/{system}/{architecture}/{distribution}" ) .format(**get_system_spec()) .replace(" ", "-") ) session = Session() session.headers["User-Agent"] = USER_AGENT_STR timeout_adapter = TimeoutHTTPAdapter(timeout=DEFAULT_HTTP_REQUEST_TIMEOUT) session.mount("http://", timeout_adapter) session.mount("https://", timeout_adapter) MAX_PATHS_QUERY_ATTEMPTS = 2 def get_pfs_info(url: str) -> PFSInfo: try: response = session.get(f"{url}/api/v1/info") infos = get_response_json(response) matrix_server_info = urlparse(infos["matrix_server"]) return PFSInfo( url=url, price=infos["price_info"], chain_id=infos["network_info"]["chain_id"],
def DebugSession(session=None): if session is None: session = Session() session.mount('https://', DebugAdapter()) session.mount('http://', DebugAdapter()) return session
class PutClient(object): """ This is a simple HTTPClient wrapper which supports putMetricData operation on CloudWatch endpoints. Keyword arguments: region -- the region used for request signing. endpoint -- the endpoint used for publishing metric data credentials -- the AWSCredentials object containing access_key, secret_key or IAM Role token used for request signing connection_timeout -- the amount of time in seconds to wait for extablishing server connection response_timeout -- the amount of time in seconds to wait for the server response """ _LOGGER = get_logger(__name__) _DEFAULT_CONNECTION_TIMEOUT = 1 _DEFAULT_RESPONSE_TIMEOUT = 3 _TOTAL_RETRIES = 1 _LOG_FILE_MAX_SIZE = 10 * 1024 * 1024 def __init__(self, config_helper, connection_timeout=_DEFAULT_CONNECTION_TIMEOUT, response_timeout=_DEFAULT_RESPONSE_TIMEOUT): self.request_builder = RequestBuilder( config_helper.credentials, config_helper.region, config_helper.enable_high_resolution_metrics) self._validate_and_set_endpoint(config_helper.endpoint) self.timeout = (connection_timeout, response_timeout) self.proxy_server_name = config_helper.proxy_server_name self.proxy_server_port = config_helper.proxy_server_port self.debug = config_helper.debug self.config = config_helper self._prepare_session() def _prepare_session(self): self.session = Session() if self.proxy_server_name is not None: proxy_server = self.proxy_server_name self._LOGGER.info("Using proxy server: " + proxy_server) if self.proxy_server_port is not None: proxy_server = proxy_server + ":" + self.proxy_server_port self._LOGGER.info("Using proxy server port: " + self.proxy_server_port) proxies = {'https': proxy_server} self.session.proxies.update(proxies) else: self._LOGGER.info("No proxy server is in use") self.session.mount("http://", HTTPAdapter(max_retries=self._TOTAL_RETRIES)) self.session.mount("https://", HTTPAdapter(max_retries=self._TOTAL_RETRIES)) def _validate_and_set_endpoint(self, endpoint): pattern = re.compile("http[s]?://*/") if pattern.match(endpoint) or "localhost" in endpoint: self.endpoint = endpoint else: msg = "Provided endpoint '" + endpoint + "' is not a valid URL." self._LOGGER.error(msg) raise PutClient.InvalidEndpointException(msg) def put_metric_data(self, namespace, metric_list): """ Publishes metric data to the endpoint with single namespace defined. It is consumers responsibility to ensure that all metrics in the metric list belong to the same namespace. """ if not self._is_namespace_consistent(namespace, metric_list): raise ValueError( "Metric list contains metrics with namespace different than the one passed as argument." ) credentials = self.config.credentials self.request_builder.credentials = credentials self.request_builder.signer.credentials = credentials request = self.request_builder.create_signed_request( namespace, metric_list) try: self._run_request(request) except Exception as e: self._LOGGER.warning( "Could not put metric data using the following endpoint: '" + self.endpoint + "'. [Exception: " + str(e) + "]") self._LOGGER.warning("Request details: '" + request + "'") def _is_namespace_consistent(self, namespace, metric_list): """ Checks if namespaces declared in MetricData objects in the metric list are consistent with the defined namespace. """ for metric in metric_list: if metric.namespace is not namespace: return False return True def _run_request(self, request): """ Executes HTTP GET request with timeout using the endpoint defined upon client creation. """ if self.debug: file_path = gettempdir() + "/collectd_plugin_request_trace_log" if os.path.isfile(file_path) and os.path.getsize( file_path) > self._LOG_FILE_MAX_SIZE: os.remove(file_path) with open(file_path, "a") as logfile: logfile.write( "curl -i -v -connect-timeout 1 -m 3 -w %{http_code}:%{http_connect}:%{content_type}:%{time_namelookup}:%{time_redirect}:%{time_pretransfer}:%{time_connect}:%{time_starttransfer}:%{time_total}:%{speed_download} -A \"collectd/1.0\" \'" + self.endpoint + "?" + request + "\'") logfile.write("\n\n") result = self.session.get(self.endpoint + "?" + request, headers=self._get_custom_headers(), timeout=self.timeout) result.raise_for_status() return result def _get_custom_headers(self): """ Returns dictionary of HTTP headers to be attached to each request """ return {"User-Agent": self._get_user_agent_header()} def _get_user_agent_header(self): """ Returns the plugin name and version used as User-Agent information """ return PLUGIN_NAME + "/" + str(PLUGIN_VERSION) class InvalidEndpointException(Exception): pass
class Connection: _API_URL = 'https://www.2degreesnetwork.com/api' def __init__(self, auth, timeout=None, api_url=None): super(Connection, self).__init__() self._api_url = api_url or self._API_URL self._authentication_handler = auth self._session = Session() self._session.headers['User-Agent'] = _USER_AGENT self._timeout = timeout http_adapter = HTTPAdapter(max_retries=_HTTP_CONNECTION_MAX_RETRIES) self._session.mount('', http_adapter) def send_get_request(self, url, query_string_args=None): """ Send a GET request :param str url: The URL or URL path to the endpoint :param dict query_string_args: The query string arguments :return: Decoded version of the ``JSON`` the remote put in \ the body of the response. """ return self._send_request('GET', url, query_string_args) def send_head_request(self, url, query_string_args=None): """ Send a HEAD request :param str url: The URL or URL path to the endpoint :param dict query_string_args: The query string arguments """ return self._send_request('HEAD', url, query_string_args) def send_post_request(self, url, body_deserialization=None): """ Send a POST request :param str url: The URL or URL path to the endpoint :param dict body_deserialization: The request's body message \ deserialized :return: Decoded version of the ``JSON`` the remote put in \ the body of the response. """ return self._send_request( 'POST', url, body_deserialization=body_deserialization, ) def send_put_request(self, url, body_deserialization): """ Send a PUT request :param str url: The URL or URL path to the endpoint :param body_deserialization: The request's body message deserialized :return: Decoded version of the ``JSON`` the remote put in \ the body of the response. """ return self._send_request( 'PUT', url, body_deserialization=body_deserialization, ) def send_delete_request(self, url): """ Send a DELETE request :param str url: The URL or URL path to the endpoint :return: Decoded version of the ``JSON`` the remote put in \ the body of the response. """ return self._send_request('DELETE', url) def _send_request( self, method, url, query_string_args=None, body_deserialization=None, ): if url.startswith(self._api_url): url = url else: url = self._api_url + url query_string_args = query_string_args or {} request_headers = \ {'content-type': 'application/json'} if body_deserialization else {} if body_deserialization: request_body_serialization = json_serialize(body_deserialization) else: request_body_serialization = None response = self._session.request( method, url, params=query_string_args, auth=self._authentication_handler, data=request_body_serialization, headers=request_headers, timeout=self._timeout, ) self._require_successful_response(response) self._require_deserializable_response_body(response) return response @staticmethod def _require_successful_response(response): if 400 <= response.status_code < 500: if response.status_code == HTTPStatus.UNAUTHORIZED: exception_class = AuthenticationError elif response.status_code == HTTPStatus.FORBIDDEN: exception_class = AccessDeniedError elif response.status_code == HTTPStatus.NOT_FOUND: exception_class = NotFoundError else: exception_class = ClientError raise exception_class() elif 500 <= response.status_code < 600: raise ServerError(response.reason, response.status_code) @classmethod def _require_deserializable_response_body(cls, response): if response.status_code in (HTTPStatus.OK, HTTPStatus.NO_CONTENT): if response.content: cls._require_json_response(response) else: exception_message = \ 'Unsupported response status {}'.format(response.status_code) raise UnsupportedResponseError(exception_message) @staticmethod def _require_json_response(response): content_type_header_value = response.headers.get('Content-Type') if not content_type_header_value: exception_message = 'Response does not specify a Content-Type' raise UnsupportedResponseError(exception_message) content_type = content_type_header_value.split(';')[0].lower() if content_type != 'application/json': exception_message = \ 'Unsupported response content type {}'.format(content_type) raise UnsupportedResponseError(exception_message) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self._session.close()
from py42._compat import urlparse from py42.exceptions import Py42DeviceNotConnectedError from py42.exceptions import Py42Error from py42.exceptions import Py42FeatureUnavailableError from py42.exceptions import raise_py42_error from py42.response import Py42Response from py42.services._auth import C42RenewableAuth from py42.settings import debug from py42.util import format_dict SESSION_ADAPTER = HTTPAdapter(pool_connections=200, pool_maxsize=4, pool_block=True) ROOT_SESSION = Session() ROOT_SESSION.mount(u"https://", SESSION_ADAPTER) ROOT_SESSION.mount(u"http://", SESSION_ADAPTER) ROOT_SESSION.headers = { u"Accept-Encoding": u"gzip, deflate", u"Connection": u"keep-alive", } class HostResolver(object): def get_host_address(self): raise NotImplementedError() class KnownUrlHostResolver(HostResolver): def __init__(self, host_address): self._host_address = host_address
def DebugSession(wrapped_session=None): session = Session() session.mount('https://', DebugAdapter(wrapped_session)) session.mount('http://', DebugAdapter(wrapped_session)) return session
import numpy import numpy as np import pandas as pd import requests from frozendict import frozendict from requests.adapters import HTTPAdapter from requests.sessions import Session from urllib3 import Retry logger = logging.getLogger(__name__) session = Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) class CustomJsonEncoder(json.JSONEncoder): def default(self, obj): if (obj is None) or isinstance(obj, (str, int, float, bool, list, tuple)): return json.JSONEncoder.default(self, obj) elif isinstance(obj, (datetime.datetime, datetime.date)): return str(obj) elif isinstance(obj, bytes): # todo: base64 return obj.decode(encoding="utf-8") elif isinstance(obj, frozendict): return dict(obj)
# # url = 'http://www.monash.edu/students/scholarships/current/merit-academic-achievement' # response = get(url) # soup = BeautifulSoup(response.text, 'html.parser') # # n = soup.find('div', id="content_container_667807") # # # start # # faculty = [] # scholarship = [] # link = [] # # t = n.find_all('li') # for item in t: # faculty.append(item.find_parent().find_previous().text) # scholarship.append(item.a.text) # link.append(item.a.get('href')) # # table = pd.DataFrame({'scholarships': scholarship, 'faculty': faculty, 'link': link}) # print(table['link'][0]) s = Session() s.mount( 'http://www.monash.edu/students/scholarships/current/merit-academic-achievement', HTTPAdapter(max_retries=100)) response = get( 'http://www.monash.edu/students/scholarships/current/merit-academic-achievement' ) print(response.text)