def open_url(url, data=None, headers=None, method=None, use_proxy=True, force=False, last_mod_time=None, timeout=10, validate_certs=True, url_username=None, url_password=None, http_agent=None, force_basic_auth=False, follow_redirects='urllib2', client_cert=None, client_key=None, cookies=None): ''' Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3) Does not require the module environment ''' handlers = [] ssl_handler = maybe_add_ssl_handler(url, validate_certs) if ssl_handler: handlers.append(ssl_handler) # FIXME: change the following to use the generic_urlparse function # to remove the indexed references for 'parsed' parsed = urlparse(url) if parsed[0] != 'ftp': username = url_username if headers is None: headers = {} if username: password = url_password netloc = parsed[1] elif '@' in parsed[1]: credentials, netloc = parsed[1].split('@', 1) if ':' in credentials: username, password = credentials.split(':', 1) else: username = credentials password = '' parsed = list(parsed) parsed[1] = netloc # reconstruct url without credentials url = urlunparse(parsed) if username and not force_basic_auth: passman = urllib_request.HTTPPasswordMgrWithDefaultRealm() # this creates a password manager passman.add_password(None, netloc, username, password) # because we have put None at the start it will always # use this username/password combination for urls # for which `theurl` is a super-url authhandler = urllib_request.HTTPBasicAuthHandler(passman) digest_authhandler = urllib_request.HTTPDigestAuthHandler(passman) # create the AuthHandler handlers.append(authhandler) handlers.append(digest_authhandler) elif username and force_basic_auth: headers["Authorization"] = basic_auth_header(username, password) else: try: rc = netrc.netrc(os.environ.get('NETRC')) login = rc.authenticators(parsed[1]) except IOError: login = None if login: username, _, password = login if username and password: headers["Authorization"] = basic_auth_header( username, password) if not use_proxy: proxyhandler = urllib_request.ProxyHandler({}) handlers.append(proxyhandler) if HAS_SSLCONTEXT and not validate_certs: # In 2.7.9, the default context validates certificates context = SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 context.options |= ssl.OP_NO_SSLv3 context.verify_mode = ssl.CERT_NONE context.check_hostname = False handlers.append( HTTPSClientAuthHandler(client_cert=client_cert, client_key=client_key, context=context)) elif client_cert: handlers.append( HTTPSClientAuthHandler(client_cert=client_cert, client_key=client_key)) # pre-2.6 versions of python cannot use the custom https # handler, since the socket class is lacking create_connection. # Some python builds lack HTTPS support. if hasattr(socket, 'create_connection') and CustomHTTPSHandler: handlers.append(CustomHTTPSHandler) handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs)) # add some nicer cookie handling if cookies is not None: handlers.append(urllib_request.HTTPCookieProcessor(cookies)) opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) data = to_bytes(data, nonstring='passthru') if method: if method.upper() not in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'CONNECT', 'PATCH'): raise ConnectionError('invalid HTTP request method; %s' % method.upper()) request = RequestWithMethod(url, method.upper(), data) else: request = urllib_request.Request(url, data) # add the custom agent header, to help prevent issues # with sites that block the default urllib agent string if http_agent: request.add_header('User-agent', http_agent) # Cache control # Either we directly force a cache refresh if force: request.add_header('cache-control', 'no-cache') # or we do it if the original is more recent than our copy elif last_mod_time: tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000') request.add_header('If-Modified-Since', tstamp) # user defined headers now, which may override things we've set above if headers: if not isinstance(headers, dict): raise ValueError("headers provided to fetch_url() must be a dict") for header in headers: request.add_header(header, headers[header]) urlopen_args = [request, None] if sys.version_info >= (2, 6, 0): # urlopen in python prior to 2.6.0 did not # have a timeout parameter urlopen_args.append(timeout) r = urllib_request.urlopen(*urlopen_args) return r
def open_url(url, data=None, headers=None, method=None, use_proxy=True, force=False, last_mod_time=None, timeout=10, validate_certs=True, url_username=None, url_password=None, http_agent=None, force_basic_auth=False, follow_redirects='urllib2'): ''' Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3) Does not require the module environment ''' handlers = [] ssl_handler = maybe_add_ssl_handler(url, validate_certs) if ssl_handler: handlers.append(ssl_handler) # FIXME: change the following to use the generic_urlparse function # to remove the indexed references for 'parsed' parsed = urlparse(url) if parsed[0] != 'ftp': username = url_username if headers is None: headers = {} if username: password = url_password netloc = parsed[1] elif '@' in parsed[1]: credentials, netloc = parsed[1].split('@', 1) if ':' in credentials: username, password = credentials.split(':', 1) else: username = credentials password = '' parsed = list(parsed) parsed[1] = netloc # reconstruct url without credentials url = urlunparse(parsed) if username and not force_basic_auth: passman = urllib_request.HTTPPasswordMgrWithDefaultRealm() # this creates a password manager passman.add_password(None, netloc, username, password) # because we have put None at the start it will always # use this username/password combination for urls # for which `theurl` is a super-url authhandler = urllib_request.HTTPBasicAuthHandler(passman) # create the AuthHandler handlers.append(authhandler) elif username and force_basic_auth: headers["Authorization"] = basic_auth_header(username, password) else: try: rc = netrc.netrc(os.environ.get('NETRC')) login = rc.authenticators(parsed[1]) except IOError: login = None if login: username, _, password = login if username and password: headers["Authorization"] = basic_auth_header(username, password) if not use_proxy: proxyhandler = urllib_request.ProxyHandler({}) handlers.append(proxyhandler) if HAS_SSLCONTEXT and not validate_certs: # In 2.7.9, the default context validates certificates context = SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 context.options |= ssl.OP_NO_SSLv3 context.verify_mode = ssl.CERT_NONE context.check_hostname = False handlers.append(urllib_request.HTTPSHandler(context=context)) # pre-2.6 versions of python cannot use the custom https # handler, since the socket class is lacking create_connection. # Some python builds lack HTTPS support. if hasattr(socket, 'create_connection') and CustomHTTPSHandler: handlers.append(CustomHTTPSHandler) handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs)) opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) if method: if method.upper() not in ('OPTIONS','GET','HEAD','POST','PUT','DELETE','TRACE','CONNECT','PATCH'): raise ConnectionError('invalid HTTP request method; %s' % method.upper()) request = RequestWithMethod(url, method.upper(), data) else: request = urllib_request.Request(url, data) # add the custom agent header, to help prevent issues # with sites that block the default urllib agent string request.add_header('User-agent', http_agent) # if we're ok with getting a 304, set the timestamp in the # header, otherwise make sure we don't get a cached copy if last_mod_time and not force: tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000') request.add_header('If-Modified-Since', tstamp) else: request.add_header('cache-control', 'no-cache') # user defined headers now, which may override things we've set above if headers: if not isinstance(headers, dict): raise ValueError("headers provided to fetch_url() must be a dict") for header in headers: request.add_header(header, headers[header]) urlopen_args = [request, None] if sys.version_info >= (2,6,0): # urlopen in python prior to 2.6.0 did not # have a timeout parameter urlopen_args.append(timeout) r = urllib_request.urlopen(*urlopen_args) return r