def http_get(self, url, headers=None, timeout=None, cache_enabled=True): """ Returns a requests.models.Response object or raises exception on failure. Will cache requests to the same URL. """ from totalimpact.api import app # first thing is to try to retrieve from cache # use the cache if the config parameter is set and the arg allows it use_cache = app.config["CACHE_ENABLED"] and cache_enabled cache_data = None if use_cache: c = Cache(self.max_cache_duration) cache_data = c.get_cache_entry(url) if cache_data: # Return a stripped down equivalent of requests.models.Response # We don't store headers or other information here. If we need # that later, we can add it class CachedResponse: pass r = CachedResponse() r.status_code = cache_data['status_code'] r.text = cache_data['text'] return r # ensure that a user-agent string is set if headers is None: headers = {} # make the request try: from totalimpact.api import app proxies = None if app.config["PROXY"]: proxies = {'http' : app.config["PROXY"], 'https' : app.config["PROXY"]} r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies) except requests.exceptions.Timeout as e: logger.debug("Attempt to connect to provider timed out during GET on " + url) raise ProviderTimeout("Attempt to connect to provider timed out during GET on " + url, e) except requests.exceptions.RequestException as e: logger.info("RequestException during GET on: " + url) raise ProviderHttpError("RequestException during GET on: " + url, e) # cache the response and return if use_cache: c.set_cache_entry(url, {'text' : r.text, 'status_code' : r.status_code}) return r
def http_get(self, url, headers=None, timeout=20, cache_enabled=True, allow_redirects=False): """ Returns a requests.models.Response object or raises exception on failure. Will cache requests to the same URL. """ from totalimpact import app # first thing is to try to retrieve from cache # use the cache if the config parameter is set and the arg allows it use_cache = app.config["CACHE_ENABLED"] and cache_enabled cache_data = None if headers: cache_key = headers.copy() else: cache_key = {} cache_key.update({"url": url, "allow_redirects": allow_redirects}) if use_cache: c = Cache(self.max_cache_duration) cache_data = c.get_cache_entry(cache_key) if cache_data: class CachedResponse: pass r = CachedResponse() r.status_code = cache_data['status_code'] # Return a stripped down equivalent of requests.models.Response # We don't store headers or other information here. If we need # that later, we can add it # use it if it was a 200, otherwise go get it again if (r.status_code == 200): r.url = cache_data['url'] r.text = cache_data['text'] self.logger.debug(u"returning from cache: %s" % (url)) return r # ensure that a user-agent string is set if headers is None: headers = {} headers["User-Agent"] = app.config["USER_AGENT"] analytics.track("CORE", "Sent GET to Provider", { "provider": self.provider_name, "url": url }, context={"providers": { 'Mixpanel': False }}) # make the request try: from totalimpact import app proxies = None if app.config["PROXY"]: proxies = { 'http': app.config["PROXY"], 'https': app.config["PROXY"] } try: self.logger.debug(u"/biblio_print LIVE {url}".format(url=url)) except UnicodeDecodeError: self.logger.debug( u"%s fyi: needing force url to unicode to print" % (self.provider_name)) self.logger.debug(u"/biblio_print LIVE {url}".format( url=unicode(url, "utf-8"))) r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies, allow_redirects=allow_redirects, verify=False) except requests.exceptions.Timeout as e: analytics.track("CORE", "Received no response from Provider (timeout)", { "provider": self.provider_name, "url": url }) self.logger.info(u"%s Provider timed out during GET on %s" % (self.provider_name, url)) raise ProviderTimeout("Provider timed out during GET on " + url, e) except requests.exceptions.RequestException as e: analytics.track("CORE", "Received RequestException from Provider", { "provider": self.provider_name, "url": url }) self.logger.info(u"%s RequestException during GET on %s" % (self.provider_name, url)) raise ProviderHttpError("RequestException during GET on: " + url, e) if not r.encoding: r.encoding = "utf-8" # cache the response and return if r and use_cache: cache_data = { 'text': r.text, 'status_code': r.status_code, 'url': r.url } c.set_cache_entry(cache_key, cache_data) return r
def http_get(self, url, headers=None, timeout=20, cache_enabled=True, allow_redirects=False): """ Returns a requests.models.Response object or raises exception on failure. Will cache requests to the same URL. """ from totalimpact import app # first thing is to try to retrieve from cache # use the cache if the config parameter is set and the arg allows it use_cache = app.config["CACHE_ENABLED"] and cache_enabled cache_data = None if headers: cache_key = headers.copy() else: cache_key = {} cache_key.update({"url":url, "allow_redirects":allow_redirects}) if use_cache: c = Cache(self.max_cache_duration) cache_data = c.get_cache_entry(cache_key) if cache_data: class CachedResponse: pass r = CachedResponse() r.status_code = cache_data['status_code'] # Return a stripped down equivalent of requests.models.Response # We don't store headers or other information here. If we need # that later, we can add it # use it if it was a 200, otherwise go get it again if (r.status_code == 200): r.url = cache_data['url'] r.text = cache_data['text'] self.logger.debug("returning from cache: %s" %(url)) return r # ensure that a user-agent string is set if headers is None: headers = {} headers["User-Agent"] = app.config["USER_AGENT"] # make the request try: from totalimpact import app proxies = None if app.config["PROXY"]: proxies = {'http' : app.config["PROXY"], 'https' : app.config["PROXY"]} self.logger.debug("LIVE %s" %(url)) r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies, allow_redirects=allow_redirects, verify=False) except requests.exceptions.Timeout as e: self.logger.info("%s Attempt to connect to provider timed out during GET on %s" %(self.provider_name, url)) raise ProviderTimeout("Attempt to connect to provider timed out during GET on " + url, e) except requests.exceptions.RequestException as e: raise ProviderHttpError("RequestException during GET on: " + url, e) if not r.encoding: r.encoding = "utf-8" # cache the response and return if r and use_cache: cache_data = {'text' : r.text, 'status_code' : r.status_code, 'url': r.url} c.set_cache_entry(cache_key, cache_data) return r