def _requests_retry_session( retries=5, backoff_factor=7, status_forcelist=[408, 500, 502, 503, 504], session=None, ): """ A Closure method for this static method. """ session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def requests_retry_session(retries=10, backoff_factor=3, status_forcelist=(500, 502, 504, 429), session=None): """https://dev.to/ssbozy/python-requests-with-retries-4p03 https://www.peterbe.com/plog/best-practice-with-retries-with-requests just going to go with this""" session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, status=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def _get_http_response_with_retries(method, url, max_retries, backoff_factor, retry_codes, **kwargs): """ Performs an HTTP request using Python's `requests` module with an automatic retry policy. :param method: a string indicating the method to use, e.g. "GET", "POST", "PUT". :param url: the target URL address for the HTTP request. :param max_retries: Maximum total number of retries. :param backoff_factor: a time factor for exponential backoff. e.g. value 5 means the HTTP request will be retried with interval 5, 10, 20... seconds. A value of 0 turns off the exponential backoff. :param retry_codes: a list of HTTP response error codes that qualifies for retry. :param kwargs: Additional keyword arguments to pass to `requests.Session.request()` :return: requests.Response object. """ assert 0 <= max_retries < 10 assert 0 <= backoff_factor < 120 retry_kwargs = { "total": max_retries, "connect": max_retries, "read": max_retries, "redirect": max_retries, "status": max_retries, "status_forcelist": retry_codes, "backoff_factor": backoff_factor, } if Version(urllib3.__version__) >= Version("1.26.0"): retry_kwargs["allowed_methods"] = None else: retry_kwargs["method_whitelist"] = None retry = Retry(**retry_kwargs) adapter = HTTPAdapter(max_retries=retry) with requests.Session() as http: http.mount("https://", adapter) http.mount("http://", adapter) response = http.request(method, url, **kwargs) return response
def load(endpoint): # start session retry_strategy = Retry( total=3, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS"], ) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) http.mount("http://", adapter) response = http.get(endpoint) headers = response.headers # find out number of pages last_page = re.findall(r'\d+(?=>; rel="last)', headers["Link"])[0] last_page = int(last_page) # loop over pages results = {} l1 = [] l2 = [] for page in tqdm(range(1, last_page + 1)): response = http.get(endpoint, params={"page": page}).json() for item in response: l1.append(item["dcterms:identifier"][0]["@value"]) l2.append(item["@id"]) sleep(0.5) results.update({"id": l1, "omeka_url": l2}) # create dataframes omeka_df = pd.DataFrame(results) omeka_duplicated = omeka_df[omeka_df.duplicated(subset="id")] if len(omeka_duplicated) > 0: omeka_duplicated.to_csv("data-out/duplicated-omeka.csv") omeka_df.drop_duplicates(subset="id", inplace=True) omeka_df.to_csv(os.environ["OMEKA"]) return omeka_df
def __init__(self, input, query, log, callback=None, rampage_type=None, conf=None, **kwargs): QObject.__init__(self) threading.Thread.__init__(self) self._input = input self._query = query self.log = log self._callback = callback self._rampage_type = rampage_type # Captcha answer, used only when rampage_type == SciHubRampageType.PDF_CAPTCHA_RESPONSE if 'captcha_answer' in kwargs: self._captcha_answer = kwargs['captcha_answer'] if conf: self._conf = conf else: self._conf = SciHubConf('SciHubEVA.conf') self._sess = requests.Session() self._sess.headers = json.loads( self._conf.get('network', 'session_header')) retry_times = self._conf.getint('network', 'retry_times') retry = Retry(total=retry_times, read=retry_times, connect=retry_times) adapter = HTTPAdapter(max_retries=retry) self._sess.mount('http://', adapter) self._sess.mount('https://', adapter) self._set_http_proxy() self._doi_pattern = r'\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'])\S)+)\b' self._illegal_filename_pattern = r'[\/\\\:\*\?\"\<\>\|]'
def diff_response(args: Tuple[str, str]): # Endpoint # /cpes/:vendor/:product path = f'cpes/{args[0]}/{args[1]}' session = requests.Session() retries = Retry(total=5, backoff_factor=1, status_forcelist=[503, 504]) session.mount("http://", HTTPAdapter(max_retries=retries)) try: response_old = requests.get(f'http://127.0.0.1:1325/{path}', timeout=(2.0, 30.0)).json() response_new = requests.get(f'http://127.0.0.1:1326/{path}', timeout=(2.0, 30.0)).json() except requests.exceptions.ConnectionError as e: logger.error( f'Failed to Connection..., err: {e}, {pprint.pformat({"args": args, "path": path}, indent=2)}' ) exit(1) except requests.exceptions.ReadTimeout as e: logger.warning( f'Failed to Read Response..., err: {e}, {pprint.pformat({"args": args, "path": path}, indent=2)}' ) except Exception as e: logger.error( f'Failed to GET request..., err: {e}, {pprint.pformat({"args": args, "path": path}, indent=2)}' ) exit(1) diff = DeepDiff(response_old, response_new, ignore_order=True) if diff != {}: logger.warning( f'There is a difference between old and new(or RDB and Redis):\n {pprint.pformat({"args": args, "path": path}, indent=2)}' ) diff_path = f'integration/diff/cpes/{args[0]}#{args[1]}' with open(f'{diff_path}.old', 'w') as w: w.write(json.dumps(response_old, indent=4)) with open(f'{diff_path}.new', 'w') as w: w.write(json.dumps(response_new, indent=4))
def __init__(self, url='', headers=None, proxy=None, debug=False): # Import Proxy Server settings self.proxies = proxy self.url = url self.debug = debug # Start the session self.sessions = requests.Session() if isinstance(headers, dict): self.sessions.headers.update(headers) retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) self.sessions.mount('https://', HTTPAdapter(max_retries=retries)) # Debugging if self.debug: print('API URL: %s' % self.url) logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) requests_log = logging.getLogger("requests.packages.urllib3") requests_log.setLevel(logging.DEBUG) requests_log.propagate = True
def download(self): for features in self._data['features']: if self._pulau == 'nusa_tenggara': nomor_peta = features['properties']['NAMOBJ'] else: nomor_peta = features['properties']['NOMOR_PETA'] download_url = self._download_url+nomor_peta+'_v1.0.tif' folder = os.getcwd()+'/downloaded/'+self._pulau filename = 'DEMNAS_'+nomor_peta+'_v1.0.tif' print(download_url) if not os.path.exists(folder): os.makedirs(folder) if not os.path.exists(folder+'/'+filename): with open(folder+'/'+filename, "wb") as file: retry = Retry(connect=3, backoff_factor=1) adapter = HTTPAdapter(max_retries=retry) self._session.mount('http://', adapter) response = self._session.get(download_url, cookies={"PHPSESSID": self._cookies["PHPSESSID"]}) file.write(response.content) if (os.path.getsize(folder+'/'+filename) < 5000): return 'Gagal Download. Cek login email dan password' return '============ TOTAL ('+str(len(self._data['features']))+') Finish! ============'
def _requests_retry_session( retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None, ): """ Returns a session to be used for requesting from an API approriately, handling some kind of failures through retrying. """ session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def requestsRetrySession( retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None, ): session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def mdx_verify_server(self) -> bool: if not ReadMeValidator._MDX_SERVER_PROCESS: server_started = ReadMeValidator.start_mdx_server( handle_error=self.handle_error, file_path=str(self.file_path)) if not server_started: return False readme_content = self.fix_mdx() retry = Retry(total=2) adapter = HTTPAdapter(max_retries=retry) session = requests.Session() session.mount('http://', adapter) response = session.request('POST', 'http://localhost:6161', data=readme_content.encode('utf-8'), timeout=20) if response.status_code != 200: error_message, error_code = Errors.readme_error(response.text) if self.handle_error(error_message, error_code, file_path=self.file_path): return False return True
def requests_session( retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None, ): session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, method_whitelist=frozenset( ['GET', 'POST', 'PUT', 'DELETE', 'HEAD', 'OPTIONS']) # urllib3 默认对除 GET 以外的方法,不设置自动重试功能,所以要主动添加白名单 ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def requests_retry_session(retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None): """ Returns a session prepared with a retry object This implementation was obtained in https://www.peterbe.com/plog/best-practice-with-retries-with-requests ++ This function logs with logging.getLogger(__name__) ++ """ # logger = logging.getLogger(__name__) session = session or requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def test_retries(self) -> None: # Responses doesn't support testing the low-level retry # functionality, so we can't test the retry itself easily. :( # https://github.com/getsentry/responses/issues/135 # Defaults to no retries session = requests.Session() self.assertEqual(session.adapters["http://"].max_retries.total, 0) self.assertEqual(session.adapters["https://"].max_retries.total, 0) session = OutgoingSession(role="testing", timeout=1) self.assertEqual(session.adapters["http://"].max_retries.total, 0) self.assertEqual(session.adapters["https://"].max_retries.total, 0) session = OutgoingSession(role="testing", timeout=1, max_retries=2) self.assertEqual(session.adapters["http://"].max_retries.total, 2) self.assertEqual(session.adapters["https://"].max_retries.total, 2) session = OutgoingSession(role="testing", timeout=1, max_retries=Retry(total=5)) self.assertEqual(session.adapters["http://"].max_retries.total, 5) self.assertEqual(session.adapters["https://"].max_retries.total, 5)
def test_request_response(self): url = 'http://localhost:{port}/users'.format( port=self.mock_server_port) # Implement retry mechanism session = requests.Session() retry_strategy = Retry(total=5, backoff_factor=1, status_forcelist=[400], method_whitelist=['POST']) adapter = HTTPAdapter(max_retries=retry_strategy) session.mount("http://", adapter) session.mount("https://", adapter) response = session.post(url=url) # Normal version # Send a request to the mock API server and store the response. response = requests.get(url) # Confirm that the request-response cycle completed successfully. print(f"response: {response}") assert_true(response.ok)
def get_tags_list(url: str, query: str, pages_count: int, retries_count: int) -> List[str]: """ Return tags list from `url` with `query` :param url: URL for `query` :param query: Query for repos search :param pages_count: Pages count for tags list creation :param retries_count: retries count in case of status code != 200 :return: List with tags """ tags_list = [] retry = Retry(total=retries_count, backoff_factor=2, raise_on_status=True, status_forcelist=[429]) with requests.Session() as session: adapter = HTTPAdapter(max_retries=retry) session.mount(prefix='https://', adapter=adapter) for page_num in range(1, pages_count + 1): logger.debug(f'Page №{page_num} request') params = { 'p': page_num, 'q': query, 'type': 'Repositories', 's': 'stars' } request_result = session.get(url=url, params=params) logger.debug(f'Page №{page_num} processing') soup = BeautifulSoup(request_result.content, 'lxml') for tag in soup.find_all( 'a', attrs={'class': 'topic-tag topic-tag-link f6 px-2 mx-0'}): tags_list.append(tag.text.strip()) logger.debug( f'Tags count after page №{page_num} processing: {len(tags_list)}' ) return tags_list
def requests_retry_session(retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504)): """Make a session that backs off automatically. :param retries: Number of times to retry a request :type retries: int :param backoff_factor: Relates to the amount of time to wait between requests: {backoff factor} * (2 ^ ({number of total retries} - 1)) :type backoff_factor: float :param status_forcelist: Status codes that prompt a retry :type status_forcelist: tuple of ints """ session = requests.Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def query_portals(context): """ Query Cumulus Portals for all published items """ endpoint = context.solid_config retry_strategy = Retry( total=3, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS"], ) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) http.mount("http://", adapter) API_STEPS = ["0", "55000"] dataframe = pd.DataFrame() for i in tqdm(API_STEPS, desc="Steps"): payload = { "table": "AssetRecords", "quicksearchstring": "jpg", "maxreturned": "55000", "startindex": i, } params = urllib.parse.urlencode(payload, quote_via=urllib.parse.quote) response = http.post(endpoint, params=params) data = response.json() results = pd.json_normalize(data["items"]) dataframe = dataframe.append(results, ignore_index=True) return dataframe
def send_session(self, prepared, stream=None): if self.session is None: self.session = requests.Session() retry = Retry( total=5, read=5, connect=5, backoff_factor=0.3, # use on any request type method_whitelist=False, # force retry on those status responses status_forcelist=(501, 502, 503, 504, 505, 506, 507, 508, 510, 511), raise_on_status=False ) adapter = HTTPAdapter(max_retries=retry, pool_maxsize=np.sum(list(self._thread_pools_names.values())), pool_connections=np.sum(list(self._thread_pools_names.values()))) self.session.mount('http://', adapter) self.session.mount('https://', adapter) resp = self.session.send(request=prepared, stream=stream, verify=self.verify, timeout=None) with threadLock: self.calls_counter.add() return resp
def __init__(self, opts: Optional[Options] = None): if opts is None: from omnipath import options as opts if not isinstance(opts, Options): raise TypeError( f"Expected `opts` to be of type `Options`, found {type(opts).__name__}." ) self._session = Session() self._options = copy(opts) # this does not copy MemoryCache if self._options.num_retries > 0: adapter = HTTPAdapter(max_retries=Retry( total=self._options.num_retries, redirect=5, allowed_methods=["HEAD", "GET", "OPTIONS"], status_forcelist=[413, 429, 500, 502, 503, 504], backoff_factor=1, )) self._session.mount("http://", adapter) self._session.mount("https://", adapter) logging.debug(f"Initialized `{self}`")
def __init__(self, *, api_key: Optional[str]): super().__init__() self.root_url = os.getenv(self.ROOT_URL_ENV_VARIABLE, self.DEFAULT_ROOT_URL) # Add the API key query parameter if api_key is not None: self.params.update({'key': api_key}) # type: ignore [union-attr] # Set the default headers self.headers.update({ 'Accept': 'application/json', # Add the package name + version and the system info to the user-agent header 'User-Agent': f'{__name__.split(".")[0]}/{__version__} ({platform.version()})' }) # Configure retries retries = Retry( total=None, connect=5, backoff_factor=0.2 ) self.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries)) self.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries))
def http_download_binary_file(request_url, file_path, auth=None, headers=None, verify_peer_certificate=True, proxies=None): """ Requests a HTTP url to save a file on the local filesystem. :param request_url: Requested HTTP URL. :param file_path: Local file path. :param auth: (optional) Auth tuple to use HTTP Auth (supported: Basic/Digest/Custom). :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param verify_peer_certificate: (optional) Flag to decide whether peer certificate has to be validated. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :raises: ConfluenceException in the case of the server does not answer with HTTP code 200. """ session = requests.Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) #print("http-download :: " + request_url) response = session.get(request_url, stream=True, auth=auth, headers=headers, verify=verify_peer_certificate, proxies=proxies) if 200 == response.status_code: with open(file_path, 'wb') as downloaded_file: response.raw.decode_content = True shutil.copyfileobj(response.raw, downloaded_file) else: raise ConfluenceException( 'Error %s: %s on requesting %s' % (response.status_code, response.reason, request_url))
def reset_session(self): self.session = requests.Session() self.retries = Retry(total=3, # リトライ回数 backoff_factor=1, # リトライが複数回起こるときに伸ばす時間 status_forcelist=[500]) # status_code self.session.mount("https://", HTTPAdapter(max_retries=self.retries))
def get_request_session(max_retries=3): from urllib3.util import Retry session = requests.Session() session.mount("http://", requests.adapters.HTTPAdapter(max_retries=Retry(total=5, status_forcelist=[500]))) session.mount("https://", requests.adapters.HTTPAdapter(max_retries=Retry(total=5, status_forcelist=[500]))) return session
def __init__(self, horizon_uri=None, pool_size=DEFAULT_POOLSIZE, num_retries=DEFAULT_NUM_RETRIES, request_timeout=DEFAULT_REQUEST_TIMEOUT, backoff_factor=DEFAULT_BACKOFF_FACTOR, user_agent=None): """The :class:`Horizon` object, which represents the interface for making requests to a Horizon server instance. This class aims to be up to date with Horizon's API endpoints; however, you can utilize the internal session via ``self.session`` (which is a :class:`requests.Session` object) to make arbitrary requests to a Horizon instance's API. In general, on HTTP errors (non 2XX/3XX responses), no exception is raised, and the return dictionary must be checked to see if it is an error or a valid response. Any other errors however are raised by this class. :param str horizon_uri: The horizon base URL :param int request_timeout: The timeout for all requests. :param int pool_size: persistent connection to Horizon and connection pool :param int num_retries: configurable request retry functionality :param float backoff_factor: a backoff factor to apply between attempts after the second try :param dict user_agent: representing the user-agent you want, such as `{'X-Client-Name': 'py-stellar-base', 'X-Client-Version': __version__}` """ if user_agent is None: self.user_agent = USER_AGENT if horizon_uri is None: self.horizon_uri = HORIZON_TEST else: self.horizon_uri = horizon_uri self.pool_size = pool_size self.num_retries = num_retries self.request_timeout = request_timeout self.backoff_factor = backoff_factor # adding 504 to the tuple of statuses to retry self.status_forcelist = tuple(Retry.RETRY_AFTER_STATUS_CODES) + (504, ) # configure standard session # configure retry handler retry = Retry(total=self.num_retries, backoff_factor=self.backoff_factor, redirect=0, status_forcelist=self.status_forcelist, raise_on_status=False) # init transport adapter adapter = HTTPAdapter(pool_connections=self.pool_size, pool_maxsize=self.pool_size, max_retries=retry) # init session session = requests.Session() # set default headers session.headers.update(self.user_agent) session.mount('http://', adapter) session.mount('https://', adapter) self._session = session # configure SSE session (differs from our standard session) sse_retry = Retry(total=1000000, redirect=0, status_forcelist=self.status_forcelist) sse_adapter = HTTPAdapter(pool_connections=self.pool_size, pool_maxsize=self.pool_size, max_retries=sse_retry) sse_session = requests.Session() sse_session.headers.update(self.user_agent) sse_session.mount('http://', sse_adapter) sse_session.mount('https://', sse_adapter) self._sse_session = sse_session
''' Bulk release script for Dataverse. Very useful if you've just imported a bunch of Dryad studies. ''' import argparse import time import requests from requests.adapters import HTTPAdapter from urllib3.util import Retry RETRY_STRATEGY = Retry( total=10, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=['HEAD', 'GET', 'OPTIONS', 'POST', 'PUT'], backoff_factor=1) VERSION = (0, 1, 0) __version__ = '.'.join([str(x) for x in VERSION]) def argp(): ''' Parses the arguments from the command line. Returns arparse.ArgumentParser ''' description = ( 'Bulk file releaser for unpublished Dataverse files. Either releases individual ' 'studies or all unreleased files in a single dataverse.')
return connectedNodes TOKEN = config('token') NUMBER = config('number') headers = {'gomoney': f'{NUMBER}', 'Authorization': 'Bearer {0}'.format(TOKEN)} seen = set() """ Retry strategy. Kicks off after getting a response with status in `status_forcelist` instead of dieing down. """ retry_strategy = Retry( total=1000, # Increase. status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["GET"], backoff_factor=2) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) # Miscellaneous pool worker file. Actually, isn't this a repetition? Wellll.. Idk. def worker(item): try: FindTreasure(item) except ConnectionError: print("Lobatan")
def get_http_session_with_retry( total=0, connect=None, read=None, redirect=None, status=None, status_forcelist=None, backoff_factor=0, backoff_max=None, pool_connections=None, pool_maxsize=None): global __disable_certificate_verification_warning if not all(isinstance(x, (int, type(None))) for x in (total, connect, read, redirect, status)): raise ValueError('Bad configuration. All retry count values must be null or int') if status_forcelist and not all(isinstance(x, int) for x in status_forcelist): raise ValueError('Bad configuration. Retry status_forcelist must be null or list of ints') pool_maxsize = ( pool_maxsize if pool_maxsize is not None else get_config().get('api.http.pool_maxsize', 512) ) pool_connections = ( pool_connections if pool_connections is not None else get_config().get('api.http.pool_connections', 512) ) session = requests.Session() # HACK: with python 2.7 there is a potential race condition that can cause # a deadlock when importing "netrc", inside the get_netrc_auth() function # setting 'session.trust_env' to False will make sure the `get_netrc_auth` is not called # see details: https://github.com/psf/requests/issues/2925 if six.PY2: session.trust_env = False if backoff_max is not None: Retry.BACKOFF_MAX = backoff_max retry = Retry( total=total, connect=connect, read=read, redirect=redirect, status=status, status_forcelist=status_forcelist, backoff_factor=backoff_factor) adapter = TLSv1HTTPAdapter(max_retries=retry, pool_connections=pool_connections, pool_maxsize=pool_maxsize) session.mount('http://', adapter) session.mount('https://', adapter) # update verify host certificate session.verify = ENV_HOST_VERIFY_CERT.get(default=get_config().get('api.verify_certificate', True)) if not session.verify and __disable_certificate_verification_warning < 2: # show warning __disable_certificate_verification_warning += 1 logging.getLogger('trains').warning( msg='InsecureRequestWarning: Certificate verification is disabled! Adding ' 'certificate verification is strongly advised. See: ' 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings') # make sure we only do not see the warning import urllib3 # noinspection PyBroadException try: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) except Exception: pass return session
if timeout is None: kwargs["timeout"] = self.timeout return super().send(request, **kwargs) # Retry frequently to prevent I/O locking up for long # replace backoff function in urllib3 Retry orig_backoff_fun = Retry.get_backoff_time def custom_backoff_time(self): return min(config.REQUEST_BACKOFF_MAX, orig_backoff_fun(self)) Retry.get_backoff_time = custom_backoff_time retries = Retry( total=config.REQUEST_RETRY_COUNT, status_forcelist=[429], method_whitelist=['GET', 'POST'], backoff_factor=config.REQUEST_BACKOFF_FACTOR, ) http = sessions.BaseUrlSession(base_url=config.METASERVER) adapter = TimeoutHTTPAdapter(max_retries=retries) http.mount("http://", adapter) http.mount("https://", adapter) def get_requests_session(): return http def get_headers(): return {
from google.cloud.bigquery import Table from google.cloud.bigquery import TimePartitioning from google.cloud.bigquery import TimePartitioningType from google.cloud.bigquery import RangePartitioning from google.cloud.bigquery import PartitionRange @dataclass class PartitionTimeFilter: exact_time : datetime = None lower_bound_datetime : datetime = None upper_bound_datetime : datetime = None TIME_ZONE = "Europe/Paris" TIME_ZONE_FMT = "%Y-%m-%d %H:%M:%S %Z%z" TIMEOUT = 10 RETRY = Retry(backoff_factor=3) MAX_RETRIES = 10 def run(): CLEANING = True LOCATION = "US" try: env = os.getenv("BQ_CLEANING") if env is not None: CLEANING = True if env.lower() == "true" else False except KeyError: logging.error("BQ_CLEANING system env not found.") pass except: raise print(f"BQ_CLEANING : {CLEANING}")