def _defaultFetcher(url): """Retrieve data from ``url``. css_parser default implementation of fetch URL function. Returns ``(encoding, string)`` or ``None`` """ try: request = urllib_Request(url) request.add_header('User-agent', 'css_parser %s (http://www.cthedot.de/css_parser/)' % VERSION) res = urllib_urlopen(request) except urllib_HTTPError as e: # http error, e.g. 404, e can be raised log.warn('HTTPError opening url=%s: %s %s' % (url, e.code, e.msg), error=e) except urllib_URLError as e: # URLError like mailto: or other IO errors, e can be raised log.warn('URLError, %s' % e.reason, error=e) except OSError as e: # e.g if file URL and not found log.warn(e, error=OSError) except ValueError as e: # invalid url, e.g. "1" log.warn('ValueError, %s' % e.args[0], error=ValueError) else: if res: mimeType, encoding = encutils.getHTTPInfo(res) if mimeType != 'text/css': log.error('Expected "text/css" mime type for url=%r but found: %r' % (url, mimeType), error=ValueError) content = res.read() if hasattr(res, 'close'): res.close() return encoding, content
def AuthorizeTokens(client_id, client_secret, authorization_code): """Obtains OAuth access token and refresh token. This uses the application portion of the "OAuth2 for Installed Applications" flow at https://developers.google.com/accounts/docs/OAuth2InstalledApp#handlingtheresponse Args: client_id: Client ID obtained by registering your app. client_secret: Client secret obtained by registering your app. authorization_code: code generated by Google Accounts after user grants permission. Returns: The decoded response from the Google Accounts server, as a dict. Expected fields include 'access_token', 'expires_in', and 'refresh_token'. """ params = {} params['client_id'] = client_id params['client_secret'] = client_secret params['code'] = authorization_code params['redirect_uri'] = REDIRECT_URI params['grant_type'] = 'authorization_code' request_url = AccountsUrl('o/oauth2/token') response = urllib_urlopen(request_url, urllib_urlencode(params).encode("utf-8")).read() return json.loads(response.decode('utf-8'))
def _doRequest(self, url): """Do an HTTP request Return (url, rawcontent) url might have been changed by server due to redirects etc """ self._log.debug(' CSSCapture._doRequest\n * URL: %s' % url) req = urllib_Request(url) if self._ua: req.add_header('User-agent', self._ua) self._log.info(' * Using User-Agent: %s', self._ua) try: res = urllib_urlopen(req) except urllib_HTTPError as e: self._log.critical(' %s\n%s %s\n%s' % ( e.geturl(), e.code, e.msg, e.headers)) return None, None # get real url if url != res.geturl(): url = res.geturl() self._log.info(' URL retrieved: %s', url) return url, res
def urlopen(cls, url, *args, **kwargs): """ Wraps 'urlopen' provided by 'urllib' to set own user agent. """ request = Request( url, None, { 'User-Agent' : 'MeerkatMon (https://github.com/lpirl/meerkatmon)' } ) return urllib_urlopen(request, *args, **kwargs)
def send(self, request, stream=False, timeout=None, verify=False, cert=None, proxies=None): if request.method != 'GET': raise HTTPMethodUnavailableError("FTP requests do not support method '%s'" % request.method, request.method) try: real_resp = urllib_urlopen(request.url, timeout=timeout) # we're going to fake a requests.Response with this resp = requests.Response() resp.status_code = 200 resp.url = request.url resp.headers = {} resp._content = real_resp.read() resp.raw = _dummy return resp except URLError: raise FTPError(request.url)
def RefreshToken(client_id, client_secret, refresh_token): """Obtains a new token given a refresh token. See https://developers.google.com/accounts/docs/OAuth2InstalledApp#refresh Args: client_id: Client ID obtained by registering your app. client_secret: Client secret obtained by registering your app. refresh_token: A previously-obtained refresh token. Returns: The decoded response from the Google Accounts server, as a dict. Expected fields include 'access_token', 'expires_in', and 'refresh_token'. """ params = {} params['client_id'] = client_id params['client_secret'] = client_secret params['refresh_token'] = refresh_token params['grant_type'] = 'refresh_token' request_url = AccountsUrl('o/oauth2/token') response = urllib_urlopen(request_url, urllib_urlencode(params).encode("utf-8")).read() return json.loads(response.decode('utf-8'))
def _defaultFetcher(url): """Retrieve data from ``url``. css_parser default implementation of fetch URL function. Returns ``(encoding, string)`` or ``None`` """ try: request = urllib_Request(url) request.add_header( 'User-agent', 'css_parser %s (http://www.cthedot.de/css_parser/)' % VERSION) res = urllib_urlopen(request) except urllib_HTTPError as e: # http error, e.g. 404, e can be raised log.warn('HTTPError opening url=%s: %s %s' % (url, e.code, e.msg), error=e) except urllib_URLError as e: # URLError like mailto: or other IO errors, e can be raised log.warn('URLError, %s' % e.reason, error=e) except OSError as e: # e.g if file URL and not found log.warn(e, error=OSError) except ValueError as e: # invalid url, e.g. "1" log.warn('ValueError, %s' % e.args[0], error=ValueError) else: if res: mimeType, encoding = encutils.getHTTPInfo(res) if mimeType != 'text/css': log.error( 'Expected "text/css" mime type for url=%r but found: %r' % (url, mimeType), error=ValueError) content = res.read() if hasattr(res, 'close'): res.close() return encoding, content
def urlopen(url): return urllib_urlopen(Request(url))
def get_data(url, forceFetch=False, decrypt=False, useCache=True): if not url: return url start = datetime.datetime.now() tag = '' data = '' forceFetch = forceFetch or not useCache cache = common_cache.get(url) if cache: try: tag = cache.get('tag') data = cache.get('data') except: data = cache if data and not forceFetch: log( 'getData Cache (' + str( int((datetime.datetime.now() - start).total_seconds() * 1000)) + 'ms) ' + str(url), 'Debug') return json.loads(data) new_headers = {} if tag != '': new_headers.update({'If-None-Match': tag}) new_headers.update({'User-Agent': 'okhttp/3.10.0'}) new_headers.update({'Accept-Encoding': 'gzip'}) try: request = urllib_urlopen(urllib_Request(url, headers=new_headers)) except urllib_HTTPError as e: if e.code == 304: log( 'getData 304 (' + str( int((datetime.datetime.now() - start).total_seconds() * 1000)) + 'ms) ' + str(url), 'Debug') return json.loads(data) failure = str(e) if hasattr(e, 'code') or hasattr(e, 'reason'): log('get_data ERROR: ' + url + ' / ' + failure) log( 'getData RequestErr (' + str(int( (datetime.datetime.now() - start).total_seconds() * 1000)) + 'ms) ' + str(url), 'Debug') return json.loads(data) if request.info().get('Content-Encoding') == 'gzip': buffer = StringIO(request.read()) deflatedContent = gzip.GzipFile(fileobj=buffer) data = deflatedContent.read() else: data = request.read() #if Etag is set, use it exp = datetime.timedelta(minutes=_cacheMinutes) if request.info().get('ETag'): tag = request.info().get('ETag') exp = datetime.timedelta(days=200) if decrypt: data = decryptBase64StringToStringss(data, _xxtea_key) common_cache.set(url, {'data': data, 'tag': tag}, expiration=exp) log( 'getData (' + str(int((datetime.datetime.now() - start).total_seconds() * 1000)) + 'ms) ' + str(url), 'Debug') return json.loads(data)