示例#1
0
def open_url(url,
             data=None,
             headers=None,
             method=None,
             use_proxy=True,
             force=False,
             last_mod_time=None,
             timeout=10,
             validate_certs=True,
             url_username=None,
             url_password=None,
             http_agent=None,
             force_basic_auth=False,
             follow_redirects='urllib2',
             client_cert=None,
             client_key=None,
             cookies=None):
    '''
    Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3)

    Does not require the module environment
    '''
    handlers = []
    ssl_handler = maybe_add_ssl_handler(url, validate_certs)
    if ssl_handler:
        handlers.append(ssl_handler)

    # FIXME: change the following to use the generic_urlparse function
    #        to remove the indexed references for 'parsed'
    parsed = urlparse(url)
    if parsed[0] != 'ftp':
        username = url_username

        if headers is None:
            headers = {}

        if username:
            password = url_password
            netloc = parsed[1]
        elif '@' in parsed[1]:
            credentials, netloc = parsed[1].split('@', 1)
            if ':' in credentials:
                username, password = credentials.split(':', 1)
            else:
                username = credentials
                password = ''

            parsed = list(parsed)
            parsed[1] = netloc

            # reconstruct url without credentials
            url = urlunparse(parsed)

        if username and not force_basic_auth:
            passman = urllib_request.HTTPPasswordMgrWithDefaultRealm()

            # this creates a password manager
            passman.add_password(None, netloc, username, password)

            # because we have put None at the start it will always
            # use this username/password combination for  urls
            # for which `theurl` is a super-url
            authhandler = urllib_request.HTTPBasicAuthHandler(passman)
            digest_authhandler = urllib_request.HTTPDigestAuthHandler(passman)

            # create the AuthHandler
            handlers.append(authhandler)
            handlers.append(digest_authhandler)

        elif username and force_basic_auth:
            headers["Authorization"] = basic_auth_header(username, password)

        else:
            try:
                rc = netrc.netrc(os.environ.get('NETRC'))
                login = rc.authenticators(parsed[1])
            except IOError:
                login = None

            if login:
                username, _, password = login
                if username and password:
                    headers["Authorization"] = basic_auth_header(
                        username, password)

    if not use_proxy:
        proxyhandler = urllib_request.ProxyHandler({})
        handlers.append(proxyhandler)

    if HAS_SSLCONTEXT and not validate_certs:
        # In 2.7.9, the default context validates certificates
        context = SSLContext(ssl.PROTOCOL_SSLv23)
        context.options |= ssl.OP_NO_SSLv2
        context.options |= ssl.OP_NO_SSLv3
        context.verify_mode = ssl.CERT_NONE
        context.check_hostname = False
        handlers.append(
            HTTPSClientAuthHandler(client_cert=client_cert,
                                   client_key=client_key,
                                   context=context))
    elif client_cert:
        handlers.append(
            HTTPSClientAuthHandler(client_cert=client_cert,
                                   client_key=client_key))

    # pre-2.6 versions of python cannot use the custom https
    # handler, since the socket class is lacking create_connection.
    # Some python builds lack HTTPS support.
    if hasattr(socket, 'create_connection') and CustomHTTPSHandler:
        handlers.append(CustomHTTPSHandler)

    handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs))

    # add some nicer cookie handling
    if cookies is not None:
        handlers.append(urllib_request.HTTPCookieProcessor(cookies))

    opener = urllib_request.build_opener(*handlers)
    urllib_request.install_opener(opener)

    data = to_bytes(data, nonstring='passthru')
    if method:
        if method.upper() not in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT',
                                  'DELETE', 'TRACE', 'CONNECT', 'PATCH'):
            raise ConnectionError('invalid HTTP request method; %s' %
                                  method.upper())
        request = RequestWithMethod(url, method.upper(), data)
    else:
        request = urllib_request.Request(url, data)

    # add the custom agent header, to help prevent issues
    # with sites that block the default urllib agent string
    if http_agent:
        request.add_header('User-agent', http_agent)

    # Cache control
    # Either we directly force a cache refresh
    if force:
        request.add_header('cache-control', 'no-cache')
    # or we do it if the original is more recent than our copy
    elif last_mod_time:
        tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
        request.add_header('If-Modified-Since', tstamp)

    # user defined headers now, which may override things we've set above
    if headers:
        if not isinstance(headers, dict):
            raise ValueError("headers provided to fetch_url() must be a dict")
        for header in headers:
            request.add_header(header, headers[header])

    urlopen_args = [request, None]
    if sys.version_info >= (2, 6, 0):
        # urlopen in python prior to 2.6.0 did not
        # have a timeout parameter
        urlopen_args.append(timeout)

    r = urllib_request.urlopen(*urlopen_args)
    return r
示例#2
0
def open_url(url, data=None, headers=None, method=None, use_proxy=True,
             force=False, last_mod_time=None, timeout=10, validate_certs=True,
             url_username=None, url_password=None, http_agent=None,
             force_basic_auth=False, follow_redirects='urllib2'):
    '''
    Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3)

    Does not require the module environment
    '''
    handlers = []
    ssl_handler = maybe_add_ssl_handler(url, validate_certs)
    if ssl_handler:
        handlers.append(ssl_handler)

    # FIXME: change the following to use the generic_urlparse function
    #        to remove the indexed references for 'parsed'
    parsed = urlparse(url)
    if parsed[0] != 'ftp':
        username = url_username

        if headers is None:
            headers = {}

        if username:
            password = url_password
            netloc = parsed[1]
        elif '@' in parsed[1]:
            credentials, netloc = parsed[1].split('@', 1)
            if ':' in credentials:
                username, password = credentials.split(':', 1)
            else:
                username = credentials
                password = ''

            parsed = list(parsed)
            parsed[1] = netloc

            # reconstruct url without credentials
            url = urlunparse(parsed)

        if username and not force_basic_auth:
            passman = urllib_request.HTTPPasswordMgrWithDefaultRealm()

            # this creates a password manager
            passman.add_password(None, netloc, username, password)

            # because we have put None at the start it will always
            # use this username/password combination for  urls
            # for which `theurl` is a super-url
            authhandler = urllib_request.HTTPBasicAuthHandler(passman)

            # create the AuthHandler
            handlers.append(authhandler)

        elif username and force_basic_auth:
            headers["Authorization"] = basic_auth_header(username, password)

        else:
            try:
                rc = netrc.netrc(os.environ.get('NETRC'))
                login = rc.authenticators(parsed[1])
            except IOError:
                login = None

            if login:
                username, _, password = login
                if username and password:
                    headers["Authorization"] = basic_auth_header(username, password)

    if not use_proxy:
        proxyhandler = urllib_request.ProxyHandler({})
        handlers.append(proxyhandler)

    if HAS_SSLCONTEXT and not validate_certs:
        # In 2.7.9, the default context validates certificates
        context = SSLContext(ssl.PROTOCOL_SSLv23)
        context.options |= ssl.OP_NO_SSLv2
        context.options |= ssl.OP_NO_SSLv3
        context.verify_mode = ssl.CERT_NONE
        context.check_hostname = False
        handlers.append(urllib_request.HTTPSHandler(context=context))

    # pre-2.6 versions of python cannot use the custom https
    # handler, since the socket class is lacking create_connection.
    # Some python builds lack HTTPS support.
    if hasattr(socket, 'create_connection') and CustomHTTPSHandler:
        handlers.append(CustomHTTPSHandler)

    handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs))

    opener = urllib_request.build_opener(*handlers)
    urllib_request.install_opener(opener)

    if method:
        if method.upper() not in ('OPTIONS','GET','HEAD','POST','PUT','DELETE','TRACE','CONNECT','PATCH'):
            raise ConnectionError('invalid HTTP request method; %s' % method.upper())
        request = RequestWithMethod(url, method.upper(), data)
    else:
        request = urllib_request.Request(url, data)

    # add the custom agent header, to help prevent issues
    # with sites that block the default urllib agent string
    request.add_header('User-agent', http_agent)

    # if we're ok with getting a 304, set the timestamp in the
    # header, otherwise make sure we don't get a cached copy
    if last_mod_time and not force:
        tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
        request.add_header('If-Modified-Since', tstamp)
    else:
        request.add_header('cache-control', 'no-cache')

    # user defined headers now, which may override things we've set above
    if headers:
        if not isinstance(headers, dict):
            raise ValueError("headers provided to fetch_url() must be a dict")
        for header in headers:
            request.add_header(header, headers[header])

    urlopen_args = [request, None]
    if sys.version_info >= (2,6,0):
        # urlopen in python prior to 2.6.0 did not
        # have a timeout parameter
        urlopen_args.append(timeout)

    r = urllib_request.urlopen(*urlopen_args)
    return r