예제 #1
0
파일: _fetch.py 프로젝트: Sigil-Ebook/Sigil
def _defaultFetcher(url):
    """Retrieve data from ``url``. css_parser default implementation of fetch
    URL function.

    Returns ``(encoding, string)`` or ``None``
    """
    try:
        request = urllib_Request(url)
        request.add_header('User-agent',
                           'css_parser %s (http://www.cthedot.de/css_parser/)' % VERSION)
        res = urllib_urlopen(request)
    except urllib_HTTPError as e:
        # http error, e.g. 404, e can be raised
        log.warn('HTTPError opening url=%s: %s %s' %
                 (url, e.code, e.msg), error=e)
    except urllib_URLError as e:
        # URLError like mailto: or other IO errors, e can be raised
        log.warn('URLError, %s' % e.reason, error=e)
    except OSError as e:
        # e.g if file URL and not found
        log.warn(e, error=OSError)
    except ValueError as e:
        # invalid url, e.g. "1"
        log.warn('ValueError, %s' % e.args[0], error=ValueError)
    else:
        if res:
            mimeType, encoding = encutils.getHTTPInfo(res)
            if mimeType != 'text/css':
                log.error('Expected "text/css" mime type for url=%r but found: %r' %
                          (url, mimeType), error=ValueError)
            content = res.read()
            if hasattr(res, 'close'):
                res.close()
            return encoding, content
예제 #2
0
def AuthorizeTokens(client_id, client_secret, authorization_code):
    """Obtains OAuth access token and refresh token.

  This uses the application portion of the "OAuth2 for Installed Applications"
  flow at https://developers.google.com/accounts/docs/OAuth2InstalledApp#handlingtheresponse

  Args:
    client_id: Client ID obtained by registering your app.
    client_secret: Client secret obtained by registering your app.
    authorization_code: code generated by Google Accounts after user grants
        permission.
  Returns:
    The decoded response from the Google Accounts server, as a dict. Expected
    fields include 'access_token', 'expires_in', and 'refresh_token'.
  """
    params = {}
    params['client_id'] = client_id
    params['client_secret'] = client_secret
    params['code'] = authorization_code
    params['redirect_uri'] = REDIRECT_URI
    params['grant_type'] = 'authorization_code'
    request_url = AccountsUrl('o/oauth2/token')

    response = urllib_urlopen(request_url,
                              urllib_urlencode(params).encode("utf-8")).read()
    return json.loads(response.decode('utf-8'))
예제 #3
0
    def _doRequest(self, url):
        """Do an HTTP request

        Return (url, rawcontent)
            url might have been changed by server due to redirects etc
        """
        self._log.debug('    CSSCapture._doRequest\n        * URL: %s' % url)

        req = urllib_Request(url)
        if self._ua:
            req.add_header('User-agent', self._ua)
            self._log.info('        * Using User-Agent: %s', self._ua)

        try:
            res = urllib_urlopen(req)
        except urllib_HTTPError as e:
            self._log.critical('    %s\n%s %s\n%s' % (
                e.geturl(), e.code, e.msg, e.headers))
            return None, None

        # get real url
        if url != res.geturl():
            url = res.geturl()
            self._log.info('        URL retrieved: %s', url)

        return url, res
예제 #4
0
	def urlopen(cls, url, *args, **kwargs):
		"""
		Wraps 'urlopen' provided by 'urllib' to set own user agent.
		"""
		request = Request(
			url,
			None,
			{ 'User-Agent' : 'MeerkatMon (https://github.com/lpirl/meerkatmon)' }
		)
		return urllib_urlopen(request, *args, **kwargs)
예제 #5
0
파일: __init__.py 프로젝트: azban/scrapelib
 def send(self, request, stream=False, timeout=None, verify=False, cert=None, proxies=None):
     if request.method != 'GET':
         raise HTTPMethodUnavailableError("FTP requests do not support method '%s'" %
                                          request.method, request.method)
     try:
         real_resp = urllib_urlopen(request.url, timeout=timeout)
         # we're going to fake a requests.Response with this
         resp = requests.Response()
         resp.status_code = 200
         resp.url = request.url
         resp.headers = {}
         resp._content = real_resp.read()
         resp.raw = _dummy
         return resp
     except URLError:
         raise FTPError(request.url)
예제 #6
0
def RefreshToken(client_id, client_secret, refresh_token):
    """Obtains a new token given a refresh token.

  See https://developers.google.com/accounts/docs/OAuth2InstalledApp#refresh

  Args:
    client_id: Client ID obtained by registering your app.
    client_secret: Client secret obtained by registering your app.
    refresh_token: A previously-obtained refresh token.
  Returns:
    The decoded response from the Google Accounts server, as a dict. Expected
    fields include 'access_token', 'expires_in', and 'refresh_token'.
  """
    params = {}
    params['client_id'] = client_id
    params['client_secret'] = client_secret
    params['refresh_token'] = refresh_token
    params['grant_type'] = 'refresh_token'
    request_url = AccountsUrl('o/oauth2/token')

    response = urllib_urlopen(request_url,
                              urllib_urlencode(params).encode("utf-8")).read()
    return json.loads(response.decode('utf-8'))
예제 #7
0
파일: _fetch.py 프로젝트: wcq062821/Sigil
def _defaultFetcher(url):
    """Retrieve data from ``url``. css_parser default implementation of fetch
    URL function.

    Returns ``(encoding, string)`` or ``None``
    """
    try:
        request = urllib_Request(url)
        request.add_header(
            'User-agent',
            'css_parser %s (http://www.cthedot.de/css_parser/)' % VERSION)
        res = urllib_urlopen(request)
    except urllib_HTTPError as e:
        # http error, e.g. 404, e can be raised
        log.warn('HTTPError opening url=%s: %s %s' % (url, e.code, e.msg),
                 error=e)
    except urllib_URLError as e:
        # URLError like mailto: or other IO errors, e can be raised
        log.warn('URLError, %s' % e.reason, error=e)
    except OSError as e:
        # e.g if file URL and not found
        log.warn(e, error=OSError)
    except ValueError as e:
        # invalid url, e.g. "1"
        log.warn('ValueError, %s' % e.args[0], error=ValueError)
    else:
        if res:
            mimeType, encoding = encutils.getHTTPInfo(res)
            if mimeType != 'text/css':
                log.error(
                    'Expected "text/css" mime type for url=%r but found: %r' %
                    (url, mimeType),
                    error=ValueError)
            content = res.read()
            if hasattr(res, 'close'):
                res.close()
            return encoding, content
예제 #8
0
 def urlopen(url):
     return urllib_urlopen(Request(url))
예제 #9
0
def get_data(url, forceFetch=False, decrypt=False, useCache=True):
    if not url:
        return url

    start = datetime.datetime.now()
    tag = ''
    data = ''
    forceFetch = forceFetch or not useCache

    cache = common_cache.get(url)
    if cache:
        try:
            tag = cache.get('tag')
            data = cache.get('data')
        except:
            data = cache

        if data and not forceFetch:
            log(
                'getData Cache (' + str(
                    int((datetime.datetime.now() - start).total_seconds() *
                        1000)) + 'ms) ' + str(url), 'Debug')
            return json.loads(data)

    new_headers = {}
    if tag != '':
        new_headers.update({'If-None-Match': tag})
    new_headers.update({'User-Agent': 'okhttp/3.10.0'})
    new_headers.update({'Accept-Encoding': 'gzip'})

    try:
        request = urllib_urlopen(urllib_Request(url, headers=new_headers))
    except urllib_HTTPError as e:
        if e.code == 304:
            log(
                'getData 304 (' + str(
                    int((datetime.datetime.now() - start).total_seconds() *
                        1000)) + 'ms) ' + str(url), 'Debug')
            return json.loads(data)
        failure = str(e)
        if hasattr(e, 'code') or hasattr(e, 'reason'):
            log('get_data ERROR: ' + url + ' / ' + failure)

        log(
            'getData RequestErr (' +
            str(int(
                (datetime.datetime.now() - start).total_seconds() * 1000)) +
            'ms) ' + str(url), 'Debug')
        return json.loads(data)

    if request.info().get('Content-Encoding') == 'gzip':
        buffer = StringIO(request.read())
        deflatedContent = gzip.GzipFile(fileobj=buffer)
        data = deflatedContent.read()
    else:
        data = request.read()

    #if Etag is set, use it
    exp = datetime.timedelta(minutes=_cacheMinutes)
    if request.info().get('ETag'):
        tag = request.info().get('ETag')
        exp = datetime.timedelta(days=200)

    if decrypt:
        data = decryptBase64StringToStringss(data, _xxtea_key)

    common_cache.set(url, {'data': data, 'tag': tag}, expiration=exp)

    log(
        'getData (' +
        str(int((datetime.datetime.now() - start).total_seconds() * 1000)) +
        'ms) ' + str(url), 'Debug')
    return json.loads(data)