def set_proxy(proxy, user=None, password=""): """ Set the HTTP proxy for Python to download through. If ``proxy`` is None then tries to set proxy from environment or system settings. :param proxy: The HTTP proxy server to use. For example: 'http://proxy.example.com:3128/' :param user: The username to authenticate with. Use None to disable authentication. :param password: The password to authenticate with. """ from nltk import compat if proxy is None: # Try and find the system proxy settings try: proxy = getproxies()["http"] except KeyError: raise ValueError("Could not detect default proxy settings") # Set up the proxy handler proxy_handler = ProxyHandler({"https": proxy, "http": proxy}) opener = build_opener(proxy_handler) if user is not None: # Set up basic proxy authentication if provided password_manager = HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password) opener.add_handler(ProxyBasicAuthHandler(password_manager)) opener.add_handler(ProxyDigestAuthHandler(password_manager)) # Overide the existing url opener install_opener(opener)
from urllib.request import HTTPPasswordMgrWithDefaultRealm as mgr from urllib.request import ProxyBasicAuthHandler, build_opener, install_opener, Request, urlopen user = '******' passwd = 'vch6gryw' proxyserver = '117.48.199.230:16816' # 构建一个密码管理器,用来保存以上信息 passwdmgr = mgr() # 添加信息参数,第一个参数是与远程服务器相关的域信息,一般写None passwdmgr.add_password(None, proxyserver, user, passwd) # 创建一个ProxyBasicAuthHandler处理器对象 proxy_handler = ProxyBasicAuthHandler(passwdmgr) #替换了之前的写法: proxy_handler = ProxyHandler({"http":"1050521852:[email protected]:16816"}) opener = build_opener(proxy_handler) opener.addheaders = [( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0' )] install_opener(opener) url = 'http://www.ipdizhichaxun.com/' req = Request(url) response = urlopen(req) html = response.read().decode('utf-8') print(html)
def setup_opener(self, url, timeout): """ Sets up a urllib OpenerDirector to be used for requests. There is a fair amount of custom urllib code in Package Control, and part of it is to handle proxies and keep-alives. Creating an opener the way below is because the handlers have been customized to send the "Connection: Keep-Alive" header and hold onto connections so they can be re-used. :param url: The URL to download :param timeout: The int number of seconds to set the timeout to """ if not self.opener: http_proxy = self.settings.get('http_proxy') https_proxy = self.settings.get('https_proxy') if http_proxy or https_proxy: proxies = {} if http_proxy: proxies['http'] = http_proxy if https_proxy: proxies['https'] = https_proxy proxy_handler = ProxyHandler(proxies) else: proxy_handler = ProxyHandler() password_manager = HTTPPasswordMgrWithDefaultRealm() proxy_username = self.settings.get('proxy_username') proxy_password = self.settings.get('proxy_password') if proxy_username and proxy_password: if http_proxy: password_manager.add_password(None, http_proxy, proxy_username, proxy_password) if https_proxy: password_manager.add_password(None, https_proxy, proxy_username, proxy_password) handlers = [proxy_handler] basic_auth_handler = ProxyBasicAuthHandler(password_manager) digest_auth_handler = ProxyDigestAuthHandler(password_manager) handlers.extend([digest_auth_handler, basic_auth_handler]) debug = self.settings.get('debug') if debug: console_write(u"Urllib Debug Proxy", True) console_write(u" http_proxy: %s" % http_proxy) console_write(u" https_proxy: %s" % https_proxy) console_write(u" proxy_username: %s" % proxy_username) console_write(u" proxy_password: %s" % proxy_password) secure_url_match = re.match('^https://([^/]+)', url) if secure_url_match != None: secure_domain = secure_url_match.group(1) bundle_path = self.check_certs(secure_domain, timeout) bundle_path = bundle_path.encode(sys.getfilesystemencoding()) handlers.append( ValidatingHTTPSHandler( ca_certs=bundle_path, debug=debug, passwd=password_manager, user_agent=self.settings.get('user_agent'))) else: handlers.append( DebuggableHTTPHandler(debug=debug, passwd=password_manager)) self.opener = build_opener(*handlers)
from urllib.request import ProxyBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm, build_opener, Request user = '******' password = '******' server_host = '114.67.224.159:16816' password_mgr = HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, server_host, user, password) ProxyHandler = ProxyBasicAuthHandler(password_mgr) opener = build_opener(ProxyHandler) url = 'http://www.atguigu.com' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } request = Request(url, headers=headers) response = opener.open(request) data = response.read() print(data)
from urllib.request import ProxyHandler, ProxyBasicAuthHandler, build_opener # 설명을 위한 코드라고 함 proxy_handler = ProxyHandler() proxy_auth_handler = ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = build_opener(proxy_handler, proxy_auth_handler) response = opener.open("http://www.example.com/login.html")
# webserver_host_port = {"http":"trygf521:[email protected]:16816"} #代理服务器ip+端口 webserver_host_port = "114.67.224.159:16816" #用户 user = "******" #密码 password = "******" #第一个参数:None http_password_mgr.add_password(None,webserver_host_port,user,password) #代理服务器类 proxy_handler = ProxyBasicAuthHandler(http_password_mgr) #自定义opener opener = build_opener(proxy_handler) url = "http://www.atguigu.com" #模拟浏览器请求头信息 head = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36" } request = Request(url,headers=head) #HttpRespnse实例对象 response = opener.open(request)
def spiderRequest(url=None, method="GET", data={}, headers={}, timeout=10, auth={}, proxy={}): headers['Cache-Control'] = 'no-chache' method = method.upper() start = time() try: #跳转记录 redirect_handler = RedirectHandler() #basic验证 auth_handler = HTTPBasicAuthHandler() if auth and 'user' in auth.keys() and 'passwd' in auth.keys(): passwdHandler = HTTPPasswordMgrWithDefaultRealm() passwdHandler.add_password(realm=None, uri=url, user=auth['user'], passwd=auth['passwd']) auth_handler = HTTPBasicAuthHandler(passwdHandler) #代理 proxy_handler = ProxyHandler() if proxy and 'url' in proxy.keys(): proxy_handler = ProxyHandler({'http': proxy['url']}) #代理验证 proxy_auth_handler = ProxyBasicAuthHandler() if proxy and 'url' in proxy.keys() and 'user' in proxy.keys( ) and 'passwd' in proxy.keys(): proxyPasswdHandler = HTTPPasswordMgrWithDefaultRealm() proxyPasswdHandler.add_password(realm=None, uri=proxy['url'], user=proxy['user'], passwd=proxy['passwd']) proxy_auth_handler = ProxyBasicAuthHandler(proxyPasswdHandler) opener = build_opener(redirect_handler, auth_handler, proxy_handler, proxy_auth_handler) request_handler = Request(quote(url, safe=string.printable), method=method) for key, value in headers.items(): request_handler.add_header(key, value) response = opener.open(request_handler, timeout=timeout) end = time() return { 'url': url, 'method': method, 'request_headers': request_handler.headers, 'response_headers': formatHeaders(response.getheaders()), 'http_code': response.status, 'redirects': redirect_handler.redirects, 'body': response.read(), 'nettime': end - start, 'error': '' } except HTTPError as e: # 400 401 402 403 500 501 502 503 504 logger.error(url + "::::::::" + repr(e)) end = time() return { 'url': url, 'method': method, 'request_headers': headers, 'response_headers': dict(e.headers), 'http_code': e.code, 'redirects': [], 'body': b'', 'nettime': end - start, 'error': repr(e) } except URLError as e: logger.error(url + "::::::::" + repr(e)) end = time() return { 'url': url, 'method': method, 'request_headers': headers, 'response_headers': {}, 'http_code': 0, 'redirects': [], 'body': b'', 'nettime': end - start, 'error': repr(e) } except timeoutError as e: logger.error(url + "::::::::" + repr(e)) end = time() return { 'url': url, 'method': method, 'request_headers': headers, 'response_headers': {}, 'http_code': 0, 'redirects': [], 'body': b'', 'nettime': end - start, 'error': repr(e) } except Exception as e: logger.exception(e) logger.error(url + "::::::::" + repr(e)) return { 'url': url, 'method': method, 'request_headers': headers, 'response_headers': {}, 'http_code': 0, 'redirects': [], 'body': b'', 'nettime': 0, 'error': repr(e) }
from urllib.request import Request, build_opener, ProxyBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm http_mgr_real = HTTPPasswordMgrWithDefaultRealm() user = '******' passwd = 'sunfengchun' pro = '97.64.107.241:8899' http_mgr_real.add_password(None, pro, user, passwd) pro_auth = ProxyBasicAuthHandler(http_mgr_real) opener = build_opener(pro_auth) request = Request('http://www.baidu.com') response = opener.open(request) print(response.read().decode('utf-8'))