Exemplo n.º 1
0
 def _requests_retry_session(
     retries=5,
     backoff_factor=7,
     status_forcelist=[408, 500, 502, 503, 504],
     session=None,
 ):
     """
     A Closure method for this static method.
     """
     session = session or requests.Session()
     retry = Retry(
         total=retries,
         read=retries,
         connect=retries,
         backoff_factor=backoff_factor,
         status_forcelist=status_forcelist,
     )
     adapter = HTTPAdapter(max_retries=retry)
     session.mount('http://', adapter)
     session.mount('https://', adapter)
     return session
Exemplo n.º 2
0
def requests_retry_session(retries=10,
                           backoff_factor=3,
                           status_forcelist=(500, 502, 504, 429),
                           session=None):
    """https://dev.to/ssbozy/python-requests-with-retries-4p03
    https://www.peterbe.com/plog/best-practice-with-retries-with-requests

    just going to go with this"""
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        status=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session
Exemplo n.º 3
0
def _get_http_response_with_retries(method, url, max_retries, backoff_factor,
                                    retry_codes, **kwargs):
    """
    Performs an HTTP request using Python's `requests` module with an automatic retry policy.

    :param method: a string indicating the method to use, e.g. "GET", "POST", "PUT".
    :param url: the target URL address for the HTTP request.
    :param max_retries: Maximum total number of retries.
    :param backoff_factor: a time factor for exponential backoff. e.g. value 5 means the HTTP
      request will be retried with interval 5, 10, 20... seconds. A value of 0 turns off the
      exponential backoff.
    :param retry_codes: a list of HTTP response error codes that qualifies for retry.
    :param kwargs: Additional keyword arguments to pass to `requests.Session.request()`

    :return: requests.Response object.
    """
    assert 0 <= max_retries < 10
    assert 0 <= backoff_factor < 120

    retry_kwargs = {
        "total": max_retries,
        "connect": max_retries,
        "read": max_retries,
        "redirect": max_retries,
        "status": max_retries,
        "status_forcelist": retry_codes,
        "backoff_factor": backoff_factor,
    }
    if Version(urllib3.__version__) >= Version("1.26.0"):
        retry_kwargs["allowed_methods"] = None
    else:
        retry_kwargs["method_whitelist"] = None

    retry = Retry(**retry_kwargs)
    adapter = HTTPAdapter(max_retries=retry)
    with requests.Session() as http:
        http.mount("https://", adapter)
        http.mount("http://", adapter)
        response = http.request(method, url, **kwargs)
        return response
Exemplo n.º 4
0
def load(endpoint):
    # start session
    retry_strategy = Retry(
        total=3,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"],
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

    response = http.get(endpoint)
    headers = response.headers

    # find out number of pages
    last_page = re.findall(r'\d+(?=>; rel="last)', headers["Link"])[0]
    last_page = int(last_page)

    # loop over pages
    results = {}
    l1 = []
    l2 = []
    for page in tqdm(range(1, last_page + 1)):
        response = http.get(endpoint, params={"page": page}).json()
        for item in response:
            l1.append(item["dcterms:identifier"][0]["@value"])
            l2.append(item["@id"])
        sleep(0.5)

    results.update({"id": l1, "omeka_url": l2})

    # create dataframes
    omeka_df = pd.DataFrame(results)
    omeka_duplicated = omeka_df[omeka_df.duplicated(subset="id")]
    if len(omeka_duplicated) > 0:
        omeka_duplicated.to_csv("data-out/duplicated-omeka.csv")
    omeka_df.drop_duplicates(subset="id", inplace=True)
    omeka_df.to_csv(os.environ["OMEKA"])
    return omeka_df
Exemplo n.º 5
0
    def __init__(self,
                 input,
                 query,
                 log,
                 callback=None,
                 rampage_type=None,
                 conf=None,
                 **kwargs):
        QObject.__init__(self)
        threading.Thread.__init__(self)

        self._input = input
        self._query = query
        self.log = log
        self._callback = callback
        self._rampage_type = rampage_type

        # Captcha answer, used only when rampage_type == SciHubRampageType.PDF_CAPTCHA_RESPONSE
        if 'captcha_answer' in kwargs:
            self._captcha_answer = kwargs['captcha_answer']

        if conf:
            self._conf = conf
        else:
            self._conf = SciHubConf('SciHubEVA.conf')

        self._sess = requests.Session()
        self._sess.headers = json.loads(
            self._conf.get('network', 'session_header'))

        retry_times = self._conf.getint('network', 'retry_times')
        retry = Retry(total=retry_times, read=retry_times, connect=retry_times)
        adapter = HTTPAdapter(max_retries=retry)
        self._sess.mount('http://', adapter)
        self._sess.mount('https://', adapter)

        self._set_http_proxy()

        self._doi_pattern = r'\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'])\S)+)\b'
        self._illegal_filename_pattern = r'[\/\\\:\*\?\"\<\>\|]'
def diff_response(args: Tuple[str, str]):
    # Endpoint
    # /cpes/:vendor/:product
    path = f'cpes/{args[0]}/{args[1]}'

    session = requests.Session()
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[503, 504])
    session.mount("http://", HTTPAdapter(max_retries=retries))

    try:
        response_old = requests.get(f'http://127.0.0.1:1325/{path}',
                                    timeout=(2.0, 30.0)).json()
        response_new = requests.get(f'http://127.0.0.1:1326/{path}',
                                    timeout=(2.0, 30.0)).json()
    except requests.exceptions.ConnectionError as e:
        logger.error(
            f'Failed to Connection..., err: {e}, {pprint.pformat({"args": args, "path": path}, indent=2)}'
        )
        exit(1)
    except requests.exceptions.ReadTimeout as e:
        logger.warning(
            f'Failed to Read Response..., err: {e}, {pprint.pformat({"args": args, "path": path}, indent=2)}'
        )
    except Exception as e:
        logger.error(
            f'Failed to GET request..., err: {e}, {pprint.pformat({"args": args, "path": path}, indent=2)}'
        )
        exit(1)

    diff = DeepDiff(response_old, response_new, ignore_order=True)
    if diff != {}:
        logger.warning(
            f'There is a difference between old and new(or RDB and Redis):\n {pprint.pformat({"args": args, "path": path}, indent=2)}'
        )

        diff_path = f'integration/diff/cpes/{args[0]}#{args[1]}'
        with open(f'{diff_path}.old', 'w') as w:
            w.write(json.dumps(response_old, indent=4))
        with open(f'{diff_path}.new', 'w') as w:
            w.write(json.dumps(response_new, indent=4))
Exemplo n.º 7
0
    def __init__(self, url='', headers=None, proxy=None, debug=False):
        # Import Proxy Server settings
        self.proxies = proxy
        self.url = url
        self.debug = debug

        # Start the session
        self.sessions = requests.Session()
        if isinstance(headers, dict):
            self.sessions.headers.update(headers)

        retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
        self.sessions.mount('https://', HTTPAdapter(max_retries=retries))

        # Debugging
        if self.debug:
            print('API URL: %s' % self.url)
            logging.basicConfig()
            logging.getLogger().setLevel(logging.DEBUG)
            requests_log = logging.getLogger("requests.packages.urllib3")
            requests_log.setLevel(logging.DEBUG)
            requests_log.propagate = True
Exemplo n.º 8
0
 def download(self):
     for features in self._data['features']:
         if self._pulau == 'nusa_tenggara':
             nomor_peta = features['properties']['NAMOBJ']
         else:
             nomor_peta = features['properties']['NOMOR_PETA']
         download_url = self._download_url+nomor_peta+'_v1.0.tif'
         folder = os.getcwd()+'/downloaded/'+self._pulau
         filename = 'DEMNAS_'+nomor_peta+'_v1.0.tif'
         print(download_url)
         if not os.path.exists(folder):
             os.makedirs(folder)
         if not os.path.exists(folder+'/'+filename):
             with open(folder+'/'+filename, "wb") as file:
                 retry = Retry(connect=3, backoff_factor=1)
                 adapter = HTTPAdapter(max_retries=retry)
                 self._session.mount('http://', adapter)
                 response = self._session.get(download_url, cookies={"PHPSESSID": self._cookies["PHPSESSID"]})
                 file.write(response.content)
                 if (os.path.getsize(folder+'/'+filename) < 5000):
                     return 'Gagal Download. Cek login email dan password'
     return '============ TOTAL ('+str(len(self._data['features']))+') Finish! ============'
Exemplo n.º 9
0
def _requests_retry_session(
        retries=3,
        backoff_factor=0.3,
        status_forcelist=(500, 502, 504),
        session=None,
):
    """
    Returns a session to be used for requesting from an API approriately,
    handling some kind of failures through retrying.
    """
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session
def requestsRetrySession(
        retries=3,
        backoff_factor=0.3,
        status_forcelist=(500, 502, 504),
        session=None,
):

    session = session or requests.Session()

    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )

    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)

    return session
Exemplo n.º 11
0
 def mdx_verify_server(self) -> bool:
     if not ReadMeValidator._MDX_SERVER_PROCESS:
         server_started = ReadMeValidator.start_mdx_server(
             handle_error=self.handle_error, file_path=str(self.file_path))
         if not server_started:
             return False
     readme_content = self.fix_mdx()
     retry = Retry(total=2)
     adapter = HTTPAdapter(max_retries=retry)
     session = requests.Session()
     session.mount('http://', adapter)
     response = session.request('POST',
                                'http://localhost:6161',
                                data=readme_content.encode('utf-8'),
                                timeout=20)
     if response.status_code != 200:
         error_message, error_code = Errors.readme_error(response.text)
         if self.handle_error(error_message,
                              error_code,
                              file_path=self.file_path):
             return False
     return True
Exemplo n.º 12
0
def requests_session(
        retries=3,
        backoff_factor=0.3,
        status_forcelist=(500, 502, 504),
        session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
        method_whitelist=frozenset(
            ['GET', 'POST', 'PUT', 'DELETE', 'HEAD',
             'OPTIONS'])  # urllib3 默认对除 GET 以外的方法,不设置自动重试功能,所以要主动添加白名单
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)

    return session
Exemplo n.º 13
0
def requests_retry_session(retries=3,
                           backoff_factor=0.3,
                           status_forcelist=(500, 502, 504),
                           session=None):
    """
        Returns a session prepared with a retry object 
        This implementation was obtained in https://www.peterbe.com/plog/best-practice-with-retries-with-requests
        ++ This function logs with logging.getLogger(__name__) ++
    """
    # logger = logging.getLogger(__name__)

    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session
Exemplo n.º 14
0
    def test_retries(self) -> None:
        # Responses doesn't support testing the low-level retry
        # functionality, so we can't test the retry itself easily. :(
        # https://github.com/getsentry/responses/issues/135

        # Defaults to no retries
        session = requests.Session()
        self.assertEqual(session.adapters["http://"].max_retries.total, 0)
        self.assertEqual(session.adapters["https://"].max_retries.total, 0)

        session = OutgoingSession(role="testing", timeout=1)
        self.assertEqual(session.adapters["http://"].max_retries.total, 0)
        self.assertEqual(session.adapters["https://"].max_retries.total, 0)

        session = OutgoingSession(role="testing", timeout=1, max_retries=2)
        self.assertEqual(session.adapters["http://"].max_retries.total, 2)
        self.assertEqual(session.adapters["https://"].max_retries.total, 2)

        session = OutgoingSession(role="testing",
                                  timeout=1,
                                  max_retries=Retry(total=5))
        self.assertEqual(session.adapters["http://"].max_retries.total, 5)
        self.assertEqual(session.adapters["https://"].max_retries.total, 5)
Exemplo n.º 15
0
    def test_request_response(self):
        url = 'http://localhost:{port}/users'.format(
            port=self.mock_server_port)

        # Implement retry mechanism
        session = requests.Session()
        retry_strategy = Retry(total=5,
                               backoff_factor=1,
                               status_forcelist=[400],
                               method_whitelist=['POST'])

        adapter = HTTPAdapter(max_retries=retry_strategy)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        response = session.post(url=url)

        # Normal version
        # Send a request to the mock API server and store the response.
        response = requests.get(url)

        # Confirm that the request-response cycle completed successfully.
        print(f"response: {response}")
        assert_true(response.ok)
Exemplo n.º 16
0
def get_tags_list(url: str, query: str, pages_count: int,
                  retries_count: int) -> List[str]:
    """
    Return tags list from `url` with `query`
    :param url: URL for `query`
    :param query: Query for repos search
    :param pages_count: Pages count for tags list creation
    :param retries_count: retries count in case of status code != 200
    :return: List with tags
    """
    tags_list = []
    retry = Retry(total=retries_count,
                  backoff_factor=2,
                  raise_on_status=True,
                  status_forcelist=[429])
    with requests.Session() as session:
        adapter = HTTPAdapter(max_retries=retry)
        session.mount(prefix='https://', adapter=adapter)
        for page_num in range(1, pages_count + 1):
            logger.debug(f'Page №{page_num} request')
            params = {
                'p': page_num,
                'q': query,
                'type': 'Repositories',
                's': 'stars'
            }
            request_result = session.get(url=url, params=params)
            logger.debug(f'Page №{page_num} processing')
            soup = BeautifulSoup(request_result.content, 'lxml')
            for tag in soup.find_all(
                    'a',
                    attrs={'class': 'topic-tag topic-tag-link f6 px-2 mx-0'}):
                tags_list.append(tag.text.strip())
            logger.debug(
                f'Tags count after page №{page_num} processing: {len(tags_list)}'
            )
    return tags_list
Exemplo n.º 17
0
def requests_retry_session(retries=3,
                           backoff_factor=0.3,
                           status_forcelist=(500, 502, 504)):
    """Make a session that backs off automatically.
    :param retries: Number of times to retry a request
    :type retries: int
    :param backoff_factor: Relates to the amount of time to wait between
        requests: {backoff factor} * (2 ^ ({number of total retries} - 1))
    :type backoff_factor: float
    :param status_forcelist: Status codes that prompt a retry
    :type status_forcelist: tuple of ints
    """
    session = requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session
Exemplo n.º 18
0
def query_portals(context):
    """
    Query Cumulus Portals for all published items
    """
    endpoint = context.solid_config

    retry_strategy = Retry(
        total=3,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"],
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

    API_STEPS = ["0", "55000"]

    dataframe = pd.DataFrame()

    for i in tqdm(API_STEPS, desc="Steps"):

        payload = {
            "table": "AssetRecords",
            "quicksearchstring": "jpg",
            "maxreturned": "55000",
            "startindex": i,
        }

        params = urllib.parse.urlencode(payload, quote_via=urllib.parse.quote)
        response = http.post(endpoint, params=params)
        data = response.json()
        results = pd.json_normalize(data["items"])
        dataframe = dataframe.append(results, ignore_index=True)

    return dataframe
Exemplo n.º 19
0
    def send_session(self, prepared, stream=None):
        if self.session is None:
            self.session = requests.Session()
            retry = Retry(
                total=5,
                read=5,
                connect=5,
                backoff_factor=0.3,
                # use on any request type
                method_whitelist=False,
                # force retry on those status responses
                status_forcelist=(501, 502, 503, 504, 505, 506, 507, 508, 510, 511),
                raise_on_status=False
            )
            adapter = HTTPAdapter(max_retries=retry,
                                  pool_maxsize=np.sum(list(self._thread_pools_names.values())),
                                  pool_connections=np.sum(list(self._thread_pools_names.values())))
            self.session.mount('http://', adapter)
            self.session.mount('https://', adapter)
        resp = self.session.send(request=prepared, stream=stream, verify=self.verify, timeout=None)

        with threadLock:
            self.calls_counter.add()
        return resp
Exemplo n.º 20
0
    def __init__(self, opts: Optional[Options] = None):
        if opts is None:
            from omnipath import options as opts

        if not isinstance(opts, Options):
            raise TypeError(
                f"Expected `opts` to be of type `Options`, found {type(opts).__name__}."
            )

        self._session = Session()
        self._options = copy(opts)  # this does not copy MemoryCache

        if self._options.num_retries > 0:
            adapter = HTTPAdapter(max_retries=Retry(
                total=self._options.num_retries,
                redirect=5,
                allowed_methods=["HEAD", "GET", "OPTIONS"],
                status_forcelist=[413, 429, 500, 502, 503, 504],
                backoff_factor=1,
            ))
            self._session.mount("http://", adapter)
            self._session.mount("https://", adapter)

        logging.debug(f"Initialized `{self}`")
Exemplo n.º 21
0
    def __init__(self, *, api_key: Optional[str]):
        super().__init__()

        self.root_url = os.getenv(self.ROOT_URL_ENV_VARIABLE, self.DEFAULT_ROOT_URL)

        # Add the API key query parameter
        if api_key is not None:
            self.params.update({'key': api_key})  # type: ignore [union-attr]

        # Set the default headers
        self.headers.update({
            'Accept': 'application/json',
            # Add the package name + version and the system info to the user-agent header
            'User-Agent': f'{__name__.split(".")[0]}/{__version__} ({platform.version()})'
        })

        # Configure retries
        retries = Retry(
            total=None,
            connect=5,
            backoff_factor=0.2
        )
        self.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries))
        self.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries))
Exemplo n.º 22
0
def http_download_binary_file(request_url,
                              file_path,
                              auth=None,
                              headers=None,
                              verify_peer_certificate=True,
                              proxies=None):
    """ Requests a HTTP url to save a file on the local filesystem.
    :param request_url: Requested HTTP URL.
    :param file_path: Local file path.
    :param auth: (optional) Auth tuple to use HTTP Auth (supported: Basic/Digest/Custom).
    :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
    :param verify_peer_certificate: (optional) Flag to decide whether peer certificate has to be validated.
    :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
    :raises: ConfluenceException in the case of the server does not answer with HTTP code 200.
    """
    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)

    #print("http-download :: " + request_url)
    response = session.get(request_url,
                           stream=True,
                           auth=auth,
                           headers=headers,
                           verify=verify_peer_certificate,
                           proxies=proxies)
    if 200 == response.status_code:
        with open(file_path, 'wb') as downloaded_file:
            response.raw.decode_content = True
            shutil.copyfileobj(response.raw, downloaded_file)
    else:
        raise ConfluenceException(
            'Error %s: %s on requesting %s' %
            (response.status_code, response.reason, request_url))
Exemplo n.º 23
0
 def reset_session(self):
     self.session = requests.Session()
     self.retries = Retry(total=3, # リトライ回数
         backoff_factor=1, # リトライが複数回起こるときに伸ばす時間
         status_forcelist=[500]) # status_code
     self.session.mount("https://", HTTPAdapter(max_retries=self.retries))
Exemplo n.º 24
0
def get_request_session(max_retries=3):
	from urllib3.util import Retry
	session = requests.Session()
	session.mount("http://", requests.adapters.HTTPAdapter(max_retries=Retry(total=5, status_forcelist=[500])))
	session.mount("https://", requests.adapters.HTTPAdapter(max_retries=Retry(total=5, status_forcelist=[500])))
	return session
Exemplo n.º 25
0
    def __init__(self,
                 horizon_uri=None,
                 pool_size=DEFAULT_POOLSIZE,
                 num_retries=DEFAULT_NUM_RETRIES,
                 request_timeout=DEFAULT_REQUEST_TIMEOUT,
                 backoff_factor=DEFAULT_BACKOFF_FACTOR,
                 user_agent=None):
        """The :class:`Horizon` object, which represents the interface for
        making requests to a Horizon server instance.

        This class aims to be up to date with Horizon's API endpoints; however,
        you can utilize the internal session via ``self.session`` (which is a
        :class:`requests.Session` object) to make arbitrary requests to
        a Horizon instance's API.

        In general, on HTTP errors (non 2XX/3XX responses), no exception is
        raised, and the return dictionary must be checked to see if it is an
        error or a valid response. Any other errors however are raised by this
        class.

        :param str horizon_uri: The horizon base URL
        :param int request_timeout: The timeout for all requests.
        :param int pool_size: persistent connection to Horizon and connection pool
        :param int num_retries: configurable request retry functionality
        :param float backoff_factor: a backoff factor to apply between attempts after the second try
        :param dict user_agent: representing the user-agent you want,
            such as `{'X-Client-Name': 'py-stellar-base', 'X-Client-Version': __version__}`

        """
        if user_agent is None:
            self.user_agent = USER_AGENT
        if horizon_uri is None:
            self.horizon_uri = HORIZON_TEST
        else:
            self.horizon_uri = horizon_uri

        self.pool_size = pool_size
        self.num_retries = num_retries
        self.request_timeout = request_timeout
        self.backoff_factor = backoff_factor

        # adding 504 to the tuple of statuses to retry
        self.status_forcelist = tuple(Retry.RETRY_AFTER_STATUS_CODES) + (504, )

        # configure standard session

        # configure retry handler
        retry = Retry(total=self.num_retries,
                      backoff_factor=self.backoff_factor,
                      redirect=0,
                      status_forcelist=self.status_forcelist,
                      raise_on_status=False)
        # init transport adapter
        adapter = HTTPAdapter(pool_connections=self.pool_size,
                              pool_maxsize=self.pool_size,
                              max_retries=retry)

        # init session
        session = requests.Session()

        # set default headers
        session.headers.update(self.user_agent)

        session.mount('http://', adapter)
        session.mount('https://', adapter)
        self._session = session

        # configure SSE session (differs from our standard session)

        sse_retry = Retry(total=1000000,
                          redirect=0,
                          status_forcelist=self.status_forcelist)
        sse_adapter = HTTPAdapter(pool_connections=self.pool_size,
                                  pool_maxsize=self.pool_size,
                                  max_retries=sse_retry)
        sse_session = requests.Session()
        sse_session.headers.update(self.user_agent)
        sse_session.mount('http://', sse_adapter)
        sse_session.mount('https://', sse_adapter)
        self._sse_session = sse_session
'''
Bulk release script for Dataverse.

Very useful if you've just imported a bunch of Dryad studies.
'''

import argparse
import time
import requests
from requests.adapters import HTTPAdapter
from urllib3.util import Retry

RETRY_STRATEGY = Retry(
    total=10,
    status_forcelist=[429, 500, 502, 503, 504],
    method_whitelist=['HEAD', 'GET', 'OPTIONS', 'POST', 'PUT'],
    backoff_factor=1)

VERSION = (0, 1, 0)
__version__ = '.'.join([str(x) for x in VERSION])


def argp():
    '''
    Parses the arguments from the command line.

    Returns arparse.ArgumentParser
    '''
    description = (
        'Bulk file releaser for unpublished Dataverse files. Either releases individual '
        'studies or all unreleased files in a single dataverse.')
Exemplo n.º 27
0
        return connectedNodes


TOKEN = config('token')
NUMBER = config('number')

headers = {'gomoney': f'{NUMBER}', 'Authorization': 'Bearer {0}'.format(TOKEN)}

seen = set()
"""
Retry strategy.
Kicks off after getting a response with status in `status_forcelist` instead of dieing down.
"""
retry_strategy = Retry(
    total=1000,  # Increase.
    status_forcelist=[429, 500, 502, 503, 504],
    method_whitelist=["GET"],
    backoff_factor=2)

adapter = HTTPAdapter(max_retries=retry_strategy)
http = requests.Session()
http.mount("https://", adapter)


# Miscellaneous pool worker file. Actually, isn't this a repetition? Wellll.. Idk.
def worker(item):
    try:
        FindTreasure(item)
    except ConnectionError:
        print("Lobatan")
Exemplo n.º 28
0
def get_http_session_with_retry(
        total=0,
        connect=None,
        read=None,
        redirect=None,
        status=None,
        status_forcelist=None,
        backoff_factor=0,
        backoff_max=None,
        pool_connections=None,
        pool_maxsize=None):
    global __disable_certificate_verification_warning
    if not all(isinstance(x, (int, type(None))) for x in (total, connect, read, redirect, status)):
        raise ValueError('Bad configuration. All retry count values must be null or int')

    if status_forcelist and not all(isinstance(x, int) for x in status_forcelist):
        raise ValueError('Bad configuration. Retry status_forcelist must be null or list of ints')

    pool_maxsize = (
        pool_maxsize
        if pool_maxsize is not None
        else get_config().get('api.http.pool_maxsize', 512)
    )

    pool_connections = (
        pool_connections
        if pool_connections is not None
        else get_config().get('api.http.pool_connections', 512)
    )

    session = requests.Session()

    # HACK: with python 2.7 there is a potential race condition that can cause
    # a deadlock when importing "netrc", inside the get_netrc_auth() function
    # setting 'session.trust_env' to False will make sure the `get_netrc_auth` is not called
    # see details: https://github.com/psf/requests/issues/2925
    if six.PY2:
        session.trust_env = False

    if backoff_max is not None:
        Retry.BACKOFF_MAX = backoff_max

    retry = Retry(
        total=total, connect=connect, read=read, redirect=redirect, status=status,
        status_forcelist=status_forcelist, backoff_factor=backoff_factor)

    adapter = TLSv1HTTPAdapter(max_retries=retry, pool_connections=pool_connections, pool_maxsize=pool_maxsize)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    # update verify host certificate
    session.verify = ENV_HOST_VERIFY_CERT.get(default=get_config().get('api.verify_certificate', True))
    if not session.verify and __disable_certificate_verification_warning < 2:
        # show warning
        __disable_certificate_verification_warning += 1
        logging.getLogger('trains').warning(
            msg='InsecureRequestWarning: Certificate verification is disabled! Adding '
                'certificate verification is strongly advised. See: '
                'https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings')
        # make sure we only do not see the warning
        import urllib3
        # noinspection PyBroadException
        try:
            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        except Exception:
            pass
    return session
Exemplo n.º 29
0
        if timeout is None:
            kwargs["timeout"] = self.timeout
        return super().send(request, **kwargs)


# Retry frequently to prevent I/O locking up for long
# replace backoff function in urllib3 Retry
orig_backoff_fun = Retry.get_backoff_time
def custom_backoff_time(self):
    return min(config.REQUEST_BACKOFF_MAX, orig_backoff_fun(self))
Retry.get_backoff_time = custom_backoff_time


retries = Retry(
    total=config.REQUEST_RETRY_COUNT,
    status_forcelist=[429],
    method_whitelist=['GET', 'POST'],
    backoff_factor=config.REQUEST_BACKOFF_FACTOR,
)

http = sessions.BaseUrlSession(base_url=config.METASERVER)
adapter = TimeoutHTTPAdapter(max_retries=retries)
http.mount("http://", adapter)
http.mount("https://", adapter)


def get_requests_session():
    return http


def get_headers():
    return {
Exemplo n.º 30
0
from google.cloud.bigquery import Table
from google.cloud.bigquery import TimePartitioning
from google.cloud.bigquery import TimePartitioningType
from google.cloud.bigquery import RangePartitioning
from google.cloud.bigquery import PartitionRange

@dataclass
class PartitionTimeFilter:
    exact_time : datetime    = None
    lower_bound_datetime : datetime = None
    upper_bound_datetime : datetime = None

TIME_ZONE = "Europe/Paris"
TIME_ZONE_FMT = "%Y-%m-%d %H:%M:%S %Z%z"
TIMEOUT = 10
RETRY = Retry(backoff_factor=3)
MAX_RETRIES = 10

def run():
    CLEANING = True
    LOCATION = "US"
    try:
        env = os.getenv("BQ_CLEANING")
        if env is not None:
            CLEANING = True if env.lower() == "true" else False
    except KeyError:
        logging.error("BQ_CLEANING system env not found.")
        pass
    except:
        raise
    print(f"BQ_CLEANING : {CLEANING}")