def getOpener(self): if self.Proxy == None: self.opener = urllib.build_opener(MultipartPostHandler.MultipartPostHandler, urllib.HTTPHandler(debuglevel=self.enableDebugging)) else: self.opener = urllib.build_opener( self.Proxy, MultipartPostHandler.MultipartPostHandler, urllib.HTTPHandler(debuglevel=self.enableDebugging)) return self.opener
def crawler(): while not q.empty(): # 循环 path = q.get() #将line从队列 q 中取出来 url = "%s%s" % (domain_name, path) # 组合url地址,用于下一步提交 random_proxy = random.choice(proxy_list) # 随机使用一个代理服务器 proxy_support = urllib.ProxyHandler(random_proxy) opener = urllib.build_opener(proxy_support) urllib.install_opener(opener) headers = {} headers['User-Agent'] = Baidu_spider # 蜘蛛的头部信息 # 玩蛇网 www.iplaypython.com request = urllib.Request(url, headers=headers) try: response = urllib.urlopen(request) content = response.read() if len(content): # 内容不为空的情况下返回状态码、路径 print ("Status [%s] - path: %s" % (response.code, path)) response.close() time.sleep(1) # 休息一会儿,防止速度过快连接数过大被封掉IP except urllib.HTTPError as e: # print e.code, path pass # 异常处理,先暂时pass掉
def auto_login_cmd(self): """Tries to authenticate the user via the parameters provided.""" # the support is still rudimentry target = self.cli_options.target # this is the url-encoded data # too much variety for us to handle different input tag names :/ parameters = self.cli_options.parameters handler = urllib.HTTPHandler() # create an openerdirector instance opener = urllib.build_opener(handler) # build a request request = urllib.Request(target, data=parameters) # add appropriate content headers request.add_header("Content-Type",'application/x-www-form-urlencoded') request.get_method = 'POST' try: conn = opener.open(request) except urllib.HTTPError as e: conn = e if conn.code == 200: page_src = conn.read() else: sys.exit("Auto_login failed!") if page_src: # check for login successful pattern match = re.search(self.check_pattern, page_src) if match: print("Login successful") else: print("login unsuccessful!")
def __init__(self, conf, dry_run=False): self.conf = conf self.regions = regions.Region.load_regions(conf=conf) self.arbor_endpoint = self.conf.get('global', 'arbor_api_url') self._arbor_username = self.conf.get('global', 'arbor_api_user') self._arbor_password = self.conf.get('global', 'arbor_api_pass') self.arbor_wsdl = '/'.join( ['file:/', os.path.dirname(os.path.realpath(__file__)), 'wsdl', 'PeakflowSP.wsdl'] ) if dry_run: self.arbor_client = mock.MagicMock() else: t = suds.transport.https.HttpAuthenticated( username=self._arbor_username, password=self._arbor_password ) t.handler = urllib2.HTTPDigestAuthHandler(t.pm) t.urlopener = urllib2.build_opener(t.handler) self.arbor_client = suds.client.Client( self.arbor_wsdl, location=urlparse.urljoin(self.arbor_endpoint, "/soap/sp"), transport=t ) self.default_replacements = { 'arbor_api_key': conf.get('global', 'arbor_api_key'), 'arbor_api_url': conf.get('global', 'arbor_api_url') } self._new_mitigations = None self._ongoing_mitigations = None self._expired_mitigations = None
def __init__(self): self.cookie_j = cookiejar.CookieJar() cookie_h = request.HTTPCookieProcessor(self.cookie_j) self.opener = request.build_opener(cookie_h) #self.opener = request.FancyURLopener() self.opener.addheaders = [('User-agent', random.choice(AGENTS))] if 'init' in dir(self): self.init()
def __init__(self, username, passwd, debug=False): (self.username, self.passwd) = (username, passwd) self.cookie = cookielib.CookieJar() cookieHandler = urllib.HTTPCookieProcessor(self.cookie) self.is_debug = debug if self.is_debug: httpHandler = urllib.HTTPHandler(debuglevel=1) httpsHandler = urllib.HTTPSHandler(debuglevel=1) opener = urllib.build_opener(cookieHandler, httpHandler, httpsHandler) else: opener = urllib.build_opener(cookieHandler) urllib.install_opener(opener) self.last_url = ''
def fetch(self, server): """ This function gets your IP from a specific server """ t = None socket_default_timeout = socket.getdefaulttimeout() opener = urllib.build_opener() opener.addheaders = [("User-agent", "Mozilla/5.0 (X11; Linux x86_64; rv:24.0)" " Gecko/20100101 Firefox/24.0")] try: # Close url resource if fetching not finished within timeout. t = Timer(self.timeout, self.handle_timeout, [self.url]) t.start() # Open URL. if version_info[0:2] == (2, 5): # Support for Python 2.5.* using socket hack # (Changes global socket timeout.) socket.setdefaulttimeout(self.timeout) self.url = opener.open(server) else: self.url = opener.open(server, timeout=self.timeout) # Read response. content = self.url.read() # Didn't want to import chardet. Prefered to stick to stdlib if PY3K: try: content = content.decode("UTF-8") except UnicodeDecodeError: content = content.decode("ISO-8859-1") p = "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(" p += "25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[" p += "01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" m = re.search(p, content) myip = m.group(0) if len(myip) > 0: return myip else: return "" except Exception as e: print(e) return "" finally: if self.url is not None: self.url.close() self.url = None if t is not None: t.cancel() # Reset default socket timeout. if socket.getdefaulttimeout() != socket_default_timeout: socket.setdefaulttimeout(socket_default_timeout)
def setupProxy(): proxies = {} if _proxyhttp != None: proxies['http'] = 'http://' + _proxyhttp os.environ['http'] = _proxyhttp if _proxyhttps != None: proxies['https'] = _proxyhttps os.environ['https'] = 'http://' + _proxyhttps if proxies != {}: proxy = urllib.ProxyHandler(proxies) opener = urllib.build_opener(proxy) urllib.install_opener(opener)
def getRandomMUID(self): opener = urllib.build_opener(urllib.HTTPErrorProcessor(), urllib.HTTPRedirectHandler(), urllib.HTTPHandler()) request = urllib.Request("http://gatherer.wizards.com/Pages/Card/Details.aspx?action=random") c = 0 while 1: try: response = opener.open(request) except Exception as e: i = int(e.__dict__['url'].split('=')[1]) if i not in self.__used_muids__: yield i elif(c > 100): # this means at least 99.9% completion. raise StopIteration() else: c += 1
def post(body): ''' This function actually communicates with the FreshBooks API ''' # setup HTTP basic authentication password_mgr = urllib.HTTPPasswordMgrWithDefaultRealm() url = "" if account_url.find('//') == -1: url = "https://" url += account_url + SERVICE_URL password_mgr.add_password(None, url, auth_token, '') handler = urllib.HTTPBasicAuthHandler(password_mgr) opener = urllib.build_opener(handler) urllib.install_opener(opener) # make the request and return the response body request = urllib.Request(url, body, request_headers) response = urllib.urlopen(request) response_content = response.read() return response_content
def retrieve(url, outfile, opener=None, overwrite=False): """ "retrieve" (i.e., download to file) a URL. """ if opener is None: opener = urllib2.build_opener() page = opener.open(url) results = chunk_read(page, report_hook=chunk_report) S = StringIO.StringIO(results) try: fitsfile = fits.open(S,ignore_missing_end=True) except IOError: S.seek(0) G = gzip.GzipFile(fileobj=S) fitsfile = fits.open(G,ignore_missing_end=True) fitsfile.writeto(outfile, clobber=overwrite)
def CBDownload(env, target, url): try: import urllib # Python 3+ except ImportError: import urllib2 as urllib sys.stdout.write('Downloading ' + url + '.') sys.stdout.flush() ftp_proxy = os.getenv('ftp_proxy', None) http_proxy = os.getenv('http_proxy', None) if ftp_proxy or http_proxy: handlers = {} if ftp_proxy: handlers['ftp'] = ftp_proxy if http_proxy: handlers['http'] = http_proxy opener = urllib.build_opener(urllib.ProxyHandler(handlers)) urllib.install_opener(opener) f = None stream = None try: stream = urllib.urlopen(url) f = open(target, 'wb', 0) # Unbuffered while stream and f: data = stream.read(1024 * 1024) if not data: break f.write(data) sys.stdout.write('.') sys.stdout.flush() sys.stdout.write('ok\n') sys.stdout.flush() finally: if f is not None: f.close() if stream is not None: stream.close()
def curl_get(url, timeout=5, proxy=False, headers=None, gzip=False): """ wowtoken.py dd373.py crawler_515fa.py crawler_amac.py crawler_for_some_site.py """ if headers is None: headers = {} opener = urllib.request.build_opener() if proxy: proxy_info = {'host': '127.0.0.1', 'port': 7890} proxy_support = urllib.ProxyHandler( {"http": "http://%(host)s:%(port)d" % proxy_info}) opener = urllib.build_opener(proxy_support) request = urllib.request.Request(url, headers=headers) resp = opener.open(request, timeout=timeout) resp_html = resp.read() if gzip: resp_html = zlib.decompress(resp_html, 16 + zlib.MAX_WBITS) return resp_html
def run(self): QgsMessageLog.logMessage( 'Started task "{}"'.format(self.description()), MESSAGE_CATEGORY, Qgis.Info) if self.proxyHost != None and self.ProxyPort != None: QgsMessageLog.logMessage('Proxy? ' + str(self.proxyHost), MESSAGE_CATEGORY, Qgis.Info) proxy = urllib.ProxyHandler({'http': proxyHost}) opener = urllib.build_opener(proxy) urllib.install_opener(opener) sparql = SPARQLWrapper( self.triplestoreurl, agent= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" ) print(str(self.query)) sparql.setQuery(self.query) print("now sending query") sparql.setReturnFormat(JSON) results = sparql.query().convert() for result in results["results"]["bindings"]: self.viewlist.append(str(result[self.queryvar]["value"])) print(self.viewlist) #self.layercount.setText("["+str(len(viewlist))+"]") if self.getlabels and "classlabelquery" in self.triplestoreconf and self.triplestoreconf[ "classlabelquery"] != "": labels = self.getLabelsForClasses( self.viewlist, self.triplestoreconf["classlabelquery"]) print(labels) self.amountoflabels = len(labels) i = 0 sorted_labels = sorted(labels.items(), key=lambda x: x[1]) for lab in sorted_labels: self.resultlist.append(labels[lab[0]] + " (" + lab[0] + ")") i = i + 1 return True
def _set_cookie(self, fileName): cookie = http.cookiejar.MozillaCookieJar() cookie.load(fileName, ignore_discard=True, ignore_expires=True) opener = urllib.build_opener(urllib.HTTPCookieProcessor(cookie)) urllib.install_opener(opener)
# -*- coding:utf-8 -*- import http.cookiejar import urllib import bs4 import re from bs4 import BeautifulSoup cookie = http.cookiejar.MozillaCookieJar() # 声明一个CookieJar对象实例来保存cookie cookie.load('cookies_csddn.txt',ignore_discard=False,ignore_expires=False) # 利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器 user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' # 用header保存模拟的请求身份 headers = {'User-Agent': user_agent, 'Referer': 'http://my.csdn.net/'} url = "http://my.csdn.net/" rq_body = '' req = urllib.Request(url, rq_body, headers) opener = urllib.build_opener(urllib.HTTPCookieProcessor(cookie)) response = opener.open(req) print response.read()
def refresh(self): """ 清空现有cookie,重新设置搜索选项并生成cookie """ cookie = cookiejar.CookieJar() opener = urllib.build_opener(urllib.HTTPCookieProcessor(cookiejar)) # Step 1: cookiejar记录ASP.NET_SessionId,LID,LID,SID_kns try: opener.open("http://epub.cnki.net/kns/brief/default_result.aspx") except: print "EXCEPTION(" + time.strftime( "%Y-%m-%d %H:%M:%S") + "):刷新Cookie时发生异常,休息" + str( self.config.get("urlopenExceptRetryInterval")) + "秒后重试" time.sleep(self.config.get("urlopenExceptRetryInterval")) opener.open("http://epub.cnki.net/kns/brief/default_result.aspx") # Step 2: 登录 try: opener.open("http://epub.cnki.net/kns/Request/login.aspx?&td=" + str(int(time.time() * 1000))) except: print "EXCEPTION(" + time.strftime( "%Y-%m-%d %H:%M:%S") + "):刷新Cookie时发生异常,休息" + str( self.config.get("urlopenExceptRetryInterval")) + "秒后重试" time.sleep(self.config.get("urlopenExceptRetryInterval")) opener.open("http://epub.cnki.net/kns/Request/login.aspx?&td=" + str(int(time.time() * 1000))) # Step 3: 设置搜索选项 data = urllib.urlencode(self.config.get('search')) self.config.set("Cookie", self.generateCookieString(cookie), "headers") headers = self.config.get("headers") request = urllib.Request( "http://epub.cnki.net/KNS/request/SearchHandler.ashx", data, headers) try: opener.open(request) except: print "EXCEPTION(" + time.strftime( "%Y-%m-%d %H:%M:%S") + "):刷新Cookie时发生异常,休息" + str( self.config.get("urlopenExceptRetryInterval")) + "秒后重试" time.sleep(self.config.get("urlopenExceptRetryInterval")) opener.open(request) additional = { "RsPerPage": self.config.get("RecordsPerPage", "list"), "cnkiUserKey": self.generateCnkiUserKey() } self.cookie = self.generateCookieString(cookie, additional) # Step 4:请求列表页第1页,设置检索参数 data = urllib.parse.urlencode(self.config.get('listPageOne')) self.config.set("Cookie", self.cookie, "headers") headers = self.config.get("headers") request = urllib.Request("http://epub.cnki.net/kns/brief/brief.aspx", data, headers) try: response = opener.open(request) except: print "EXCEPTION(" + time.strftime( "%Y-%m-%d %H:%M:%S") + "):刷新Cookie时发生异常,休息" + str( self.config.get("urlopenExceptRetryInterval")) + "秒后重试" time.sleep(self.config.get("urlopenExceptRetryInterval")) response = opener.open(request) # 获取总页数 soup = BeautifulSoup(response.read()) if soup.find('span', {"class": "countPageMark"}): s = soup.find('span', {"class": "countPageMark"}).get_text() s = s.split("/") if len(s) >= 2: self.totalListPage = int(s[1]) return self.cookie
def main(): opener = urllib.build_opener( urllib.HTTPCookieProcessor(cookielib.CookieJar())) urllib.install_opener(opener) if getUUID() == False: print('获取uuid失败') return showQRImage() time.sleep(1) while waitForLogin() != '200': pass os.remove(QRImagePath) if login() == False: print('登录失败') return if webwxinit() == False: print('初始化失败') return MemberList = webwxgetcontact() MemberCount = len(MemberList) print('通讯录共%s位好友') % MemberCount ChatRoomName = '' result = [] for i in range(0, int(math.ceil(MemberCount / float(MAX_GROUP_NUM)))): UserNames = [] NickNames = [] DeletedList = '' for j in range(0, MAX_GROUP_NUM): if i * MAX_GROUP_NUM + j >= MemberCount: break Member = MemberList[i * MAX_GROUP_NUM + j] UserNames.append(Member['UserName']) NickNames.append(Member['NickName'].encode('utf-8')) print('第%s组...') % (i + 1) print(', '.join(NickNames)) print('回车键继续...') input('say soemthing:') # 新建群组/添加成员 if ChatRoomName == '': (ChatRoomName, DeletedList) = createChatroom(UserNames) else: DeletedList = addMember(ChatRoomName, UserNames) DeletedCount = len(DeletedList) if DeletedCount > 0: result += DeletedList print('找到%s个被删好友') % DeletedCount # raw_input() # 删除成员 deleteMember(ChatRoomName, UserNames) # todo 删除群组 resultNames = [] for Member in MemberList: if Member['UserName'] in result: NickName = Member['NickName'] if Member['RemarkName'] != '': NickName += '(%s)' % Member['RemarkName'] resultNames.append(NickName.encode('utf-8')) print('---------- 被删除的好友列表 ----------') print('\n'.join(resultNames)) print('-----------------------------------')
def run(self): QgsMessageLog.logMessage( 'Started task "{}"'.format(self.description()), MESSAGE_CATEGORY, Qgis.Info) if self.proxyHost != None and self.proxyHost != "" and self.proxyPort != None and self.proxyPort != "": QgsMessageLog.logMessage('Proxy? ' + str(self.proxyHost), MESSAGE_CATEGORY, Qgis.Info) proxy = urllib.ProxyHandler({'http': proxyHost}) opener = urllib.build_opener(proxy) urllib.install_opener(opener) #msgBox=QMessageBox() #msgBox.setText(self.query+" - "+self.triplestoreconf[self.tripleStoreEdit.currentIndex()+1]["endpoint"]) #msgBox.exec() if self.findProperty.isChecked(): if "propertyfromlabelquery" in self.triplestoreconf[ self.tripleStoreEdit.currentIndex() + 1]: self.query = self.triplestoreconf[ self.tripleStoreEdit.currentIndex() + 1]["propertyfromlabelquery"].replace( "%%label%%", self.label) else: if "classfromlabelquery" in self.triplestoreconf[ self.tripleStoreEdit.currentIndex() + 1]: self.query = self.triplestoreconf[ self.tripleStoreEdit.currentIndex() + 1]["classfromlabelquery"].replace("%%label%%", self.label) if self.query == "": return if "SELECT" in self.query: self.query = self.query.replace("%%label%%", self.label).replace( "%%language%%", self.language) sparql = SPARQLWrapper( self.triplestoreconf[self.tripleStoreEdit.currentIndex() + 1]["endpoint"], agent= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" ) sparql.setQuery(self.prefixes[self.tripleStoreEdit.currentIndex() + 1] + self.query) sparql.setReturnFormat(JSON) self.results = sparql.query().convert() # msgBox=QMessageBox() # msgBox.setText(str(results)) # msgBox.exec() for res in self.results["results"]["bindings"]: item = QListWidgetItem() item.setData(1, str(res["class"]["value"])) if "label" in res: item.setText( str(res["label"]["value"] + " (" + res["class"]["value"] + ")")) else: item.setText(str(res["class"]["value"])) self.searchResult.addItem(item) else: myResponse = json.loads(requests.get(self.query).text) self.qids = [] for ent in myResponse["search"]: qid = ent["concepturi"] if "http://www.wikidata.org/entity/" in qid and self.findProperty.isChecked( ): qid = "http://www.wikidata.org/prop/direct/" + ent["id"] elif "http://www.wikidata.org/wiki/" in qid and self.findConcept.isChecked( ): qid = "http://www.wikidata.org/entity/" + ent["id"] self.qids.append(qid) label = ent["label"] + " (" + ent["id"] + ") " if "description" in ent: label += "[" + ent["description"] + "]" self.results[qid] = label
import urllib import time username = "******" password = "******" def send_to_twitter(msg): password_manager = urllib.HTTPPasswordMgr() password_manager.add_password("Twitter API", "http://twitter.com/statuses", username, password) http_handler = urllib.HTTPBasicAuthHandler(password_manager) page_opener = urllib.build_opener(http_handler) urllib.install_opener(page_opener) params = urllib.parse.urlencode( {'status': msg}) resp = urllib.urlopen("http://twitter.com/statuses/update.json", params) resp.read() def get_price(): price = urllib.urlopen("http://") text = page.read().decode("utf8") where = text.find('>$') start_of_price = text + 2 end_of_price = start_of_price +4 return(text[start_of_price:end_of_price]) get_price() price_now = input("Do you need a price right now? (Y/N)") if price_now == "Y": send_to_twitter(get_price()) else: price = 99.99
def run(self): QgsMessageLog.logMessage( 'Started task "{}"'.format(self.description()), MESSAGE_CATEGORY, Qgis.Info) if self.searchTerm == "": return False concept = "<" + self.searchTerm + ">" if self.proxyHost != None and self.ProxyPort != None: QgsMessageLog.logMessage('Proxy? ' + str(self.proxyHost), MESSAGE_CATEGORY, Qgis.Info) proxy = urllib.ProxyHandler({'http': proxyHost}) opener = urllib.build_opener(proxy) urllib.install_opener(opener) sparql = SPARQLWrapper( self.triplestoreurl, agent= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" ) sparql.setQuery("".join(self.prefixes) + self.query) sparql.setReturnFormat(JSON) results = sparql.query().convert() self.searchResult.clear() if len(results["results"]["bindings"]) == 0: return False maxcons = int(results["results"]["bindings"][0]["countcon"]["value"]) attlist = {} self.urilist = {} for result in results["results"]["bindings"]: attlist[result["rel"]["value"] [result["rel"]["value"].rfind('/') + 1:]] = round( (int(result["countrel"]["value"]) / maxcons) * 100, 2) self.urilist[result["rel"]["value"] [result["rel"]["value"].rfind('/') + 1:]] = result["rel"]["value"] self.sortedatt = sorted(attlist.items(), reverse=True, key=lambda kv: kv[1]) self.labels = {} postdata = {} postdata["language"] = "en" postdata["format"] = "json" postdata["action"] = "wbgetentities" atts = [""] attcounter = 0 count = 0 for att in attlist.keys(): if att.startswith("P") and count < 50: atts[attcounter] += att + "|" count += 1 atts[0] = atts[0][:-1] i = 0 for att in atts: url = "https://www.wikidata.org/w/api.php" #?action=wbgetentities&format=json&language=en&ids="+atts postdata["ids"] = att #msgBox=QMessageBox() #msgBox.setText(str(postdata)) #msgBox.exec() myResponse = json.loads(requests.post(url, postdata).text) #msgBox=QMessageBox() #msgBox.setText(str(myResponse)) #msgBox.exec() for ent in myResponse["entities"]: print(ent) if "en" in myResponse["entities"][ent]["labels"]: self.labels[ent] = myResponse["entities"][ent]["labels"][ "en"]["value"] i = i + 1 return True
def __init__(self, wx_app_id, wx_mch_id, wx_mch_key, wx_notify_url): self.opener = urllib.build_opener(urllib.HTTPSHandler()) self.WX_APP_ID = wx_app_id self.WX_MCH_ID = wx_mch_id self.WX_MCH_KEY = wx_mch_key self.WX_NOTIFY_URL = wx_notify_url
def OP_RETURN_bitcoin_cmd(command, testnet, *args): # more params are read from here if OP_RETURN_BITCOIN_USE_CMD: sub_args = [OP_RETURN_BITCOIN_PATH] if testnet: sub_args.append('-testnet') sub_args.append(command) for arg in args: sub_args.append( json.dumps(arg) if isinstance(arg, (dict, list, tuple)) else str(arg)) raw_result = subprocess.check_output(sub_args).decode("utf-8").rstrip( "\n") try: # decode JSON if possible result = json.loads(raw_result) except ValueError: result = raw_result else: request = { 'id': str(time.time()) + '-' + str(random.randint(100000, 999999)), 'method': command, 'params': args, } port = OP_RETURN_BITCOIN_PORT user = OP_RETURN_BITCOIN_USER password = OP_RETURN_BITCOIN_PASSWORD if not (len(port) and len(user) and len(password)): conf_lines = open(os.path.expanduser('~') + '/.zen/zen.conf').readlines() for conf_line in conf_lines: parts = conf_line.strip().split('=', 1) # up to 2 parts if (parts[0] == 'rpcport') and not len(port): port = int(parts[1]) if (parts[0] == 'rpcuser') and not len(user): user = parts[1] if (parts[0] == 'rpcpassword') and not len(password): password = parts[1] if not len(port): port = 18231 if testnet else 8231 if not (len(user) and len(password)): return None # no point trying in this case url = 'http://' + OP_RETURN_BITCOIN_IP + ':' + str(port) + '/' try: from urllib import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, build_opener, install_opener, urlopen except ImportError: from urllib.request import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, build_opener, install_opener, urlopen passman = HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, url, user, password) auth_handler = HTTPBasicAuthHandler(passman) opener = build_opener(auth_handler) install_opener(opener) raw_result = urlopen(url, json.dumps(request).encode('utf-8'), OP_RETURN_NET_TIMEOUT).read() result_array = json.loads(raw_result.decode('utf-8')) result = result_array['result'] return result
def main(): opener = urllib.build_opener(urllib.HTTPCookieProcessor(cookielib.CookieJar())) urllib.install_opener(opener) if getUUID() == False: print('获取uuid失败') return showQRImage() time.sleep(1) while waitForLogin() != '200': pass os.remove(QRImagePath) if login() == False: print('登录失败') return if webwxinit() == False: print('初始化失败') return MemberList = webwxgetcontact() MemberCount = len(MemberList) print('通讯录共%s位好友') % MemberCount ChatRoomName = '' result = [] for i in range(0, int(math.ceil(MemberCount / float(MAX_GROUP_NUM)))): UserNames = [] NickNames = [] DeletedList = '' for j in range(0, MAX_GROUP_NUM): if i * MAX_GROUP_NUM + j >= MemberCount: break Member = MemberList[i * MAX_GROUP_NUM + j] UserNames.append(Member['UserName']) NickNames.append(Member['NickName'].encode('utf-8')) print('第%s组...') % (i + 1) print(', '.join(NickNames)) print('回车键继续...') input('say soemthing:') # 新建群组/添加成员 if ChatRoomName == '': (ChatRoomName, DeletedList) = createChatroom(UserNames) else: DeletedList = addMember(ChatRoomName, UserNames) DeletedCount = len(DeletedList) if DeletedCount > 0: result += DeletedList print('找到%s个被删好友') % DeletedCount # raw_input() # 删除成员 deleteMember(ChatRoomName, UserNames) # todo 删除群组 resultNames = [] for Member in MemberList: if Member['UserName'] in result: NickName = Member['NickName'] if Member['RemarkName'] != '': NickName += '(%s)' % Member['RemarkName'] resultNames.append(NickName.encode('utf-8')) print('---------- 被删除的好友列表 ----------') print('\n'.join(resultNames)) print('-----------------------------------')
# #html=urlopen("http://pythonscraping.com/pages/page1.html") #bsobj=BeautifulSoup(html.read()) #print(bsobj.h1) #try: # html=urlopen("http://www.pythonscraping.com/exercises/se1.html") #except HTTPError as e: # print(e) #else: # print("OK") login_page="https://passport.csdn.net/account/login" try: cj=http.cookieJar.LWPCookieJar() opener=urllib.build_opener(urllib.request.HTTPCookieProcessor(cj),urllib.request.HTTPHandler) urllib.request.install_opener(opener) h = urllib.request.urlopen(login_page) headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1', 'Referer' : '******'} postData = {'op' : 'dmlogin', 'f' : 'st', 'user' : 'huisnotwu', #你的用户名 'pass' : '', #你的密码,密码可能是明文传输也可能是密文,如果是密文需要调用相应的加密算法加密 'rmbr' : 'true', #特有数据,不同网站可能不同 'tmp' : '0.7306424454308195' #特有数据,不同网站可能不同 }
def build_opener(access_token = ACCESS_TOKEN): opener = urllib.build_opener() opener.addheaders = [("Authorization", "Bearer "+access_token)]
(consumer_secret + '&' + oauth_token_secret).encode('utf8'), message.encode('utf8'), hashlib.sha1).digest() if sys.version_info[0] < 3: oauth['oauth_signature'] = signature.encode('base64').strip() else: oauth['oauth_signature'] = base64.b64encode(signature) # パラメータをソートし,URIエンコードした key=value の形にして , で繋げます header_oauth_str = ','.join( urllib.quote(k, '') + '=' + urllib.quote(oauth[k], '~') for k in sorted(oauth)) del oauth[ 'oauth_signature'] #oauthオブジェクトを使いまわせるよう、signatureは破棄する必要があります。Fixed on Jul 5 # ヘッダに Authorization:OAuth + 上で作成した文字列を追加します opener = urllib.build_opener() opener.addheaders = [('Authorization', 'OAuth ' + header_oauth_str)] # あとは普通に API を呼び出します if method == 'POST': tweets = json.loads( opener.open(url, urllib.urlencode(params)).read().decode('utf8')) else: tweets = json.loads( opener.open(url + '?' + urllib.urlencode(params)).read().decode('utf8')) # --- cite end, my own scraping code follows --- if url == 'https://api.twitter.com/1.1/search/tweets.json': tweets = tweets['statuses'] print('url,tweet_count')
def download(): """Download weather data from weather.gov.sg""" logger.info('Downloading weather data from weather.gov.sg') # setup for different python versions if sys.version_info < (3, 0): try: os.makedirs(DIRECTORY) except OSError as e: pass import urllib as r from urllib2 import URLError, HTTPError else: os.makedirs(DIRECTORY, exist_ok=True) import urllib.request as r from urllib.error import URLError, HTTPError # test of the folder name base_url = "http://www.weather.gov.sg/files/dailydata/DAILYDATA_" out_path = DIRECTORY + "weather_SG/" if not os.path.exists(out_path): os.makedirs(out_path) # weather stations that has data from year 2012 onwards, and has mean teamperatures. # http://www.weather.gov.sg/wp-content/uploads/2016/12/Station_Records.pdf weather_station_ids = [23, 24, 25, 43, 44, 50, 60, 80, 86, 102, 104, 106, 107, 108, 109, 111, 115] current_year = int(datetime.today().strftime("%Y")) today_ym = int(datetime.today().strftime("%Y%m")) today_d = int(datetime.today().strftime("%d")) logger.debug('today ym: %d, day %d', today_ym, today_d) # add headers by building an opener opener = r.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] # will loop thru each weather station and try to download the csv datafile for year in range(2012,current_year+1): y = str(year) for month in range(1,13): m = "%02d"%month file_ym = int(y+m) if ((file_ym == (today_ym-1)) and (today_d <= 10)): break elif (file_ym == today_ym): break for station_id in weather_station_ids: ws = 'S' + str(station_id) try: # set URL url = base_url + ws + "_" + y + m + ".csv" # set out file name filename = out_path + ws + "_" + y + m + ".csv" # Download the file from `URL` and save it locally under `FILE_NAME`: with opener.open(url) as response: if response.getcode() == 200: with open(filename, 'wb') as out_file: data = response.read() # a `bytes` object out_file.write(data) except: # as not all data is available the same month for all the stations you will get a 404 error if the data is not here logger.debug('error, url: %s',url) pass
import urllib import cookielib #声明一个CookieJar对象实例来保存cookie cookie = cookielib.CookieJar() #利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器 handler = urllib.HTTPCookieProcessor(cookie) #通过handler来构建opener opener = urllib.build_opener(handler) #此处的open方法同urllib2的urlopen方法,也可以传入request response = opener.open('http://www.baidu.com') for item in cookie: print('Name = ' + item.name) print('Value = ' + item.value) #利用cookie请求访问另一个网址 gradeUrl = 'http://www.baidu.com/xxx/xx' #请求访问 result = opener.open(gradeUrl) print(result.read())
def getimage(cls, imagename): """ Downloads the requested image @return: path of the image, locally """ logger.info("UtilsTest.getimage('%s')" % imagename) fullimagename = os.path.join(cls.image_home, imagename) if not os.path.isfile(fullimagename): logger.info("Trying to download image %s, timeout set to %ss" % (imagename, cls.timeout)) if "http_proxy" in os.environ: dictProxies = {'http': os.environ["http_proxy"]} proxy_handler = urllib2.ProxyHandler(dictProxies) opener = urllib2.build_opener(proxy_handler).open else: opener = urllib2.urlopen # Nota: since python2.6 there is a timeout in the urllib2 timer = threading.Timer(cls.timeout + 1, cls.timeoutDuringDownload) timer.start() logger.info("wget %s/%s" % (cls.url_base, imagename)) if sys.version > (2, 6): data = opener("%s/%s" % (cls.url_base, imagename), data=None, timeout=cls.timeout).read() else: data = opener("%s/%s" % (cls.url_base, imagename), data=None).read() timer.cancel() logger.info("Image %s successfully downloaded." % imagename) try: open(fullimagename, "wb").write(data) except IOError: raise IOError("unable to write downloaded \ data to disk at %s" % cls.image_home) if not os.path.isfile(fullimagename): raise RuntimeError("Could not automatically \ download test images %s!\n \ If you are behind a firewall, \ please set the environment variable http_proxy.\n \ Otherwise please try to download the images manually from \n \ %s" % (cls.url_base, imagename)) if imagename.endswith(".bz2"): decompressed = bz2.decompress(data) basename = fullimagename[:-4] elif imagename.endswith(".gz"): decompressed = gzip.open(fullimagename).read() basename = fullimagename[:-3] else: decompressed = data basename = fullimagename gzipname = basename + ".gz" bzip2name = basename + ".bz2" if basename != fullimagename: try: open(basename, "wb").write(decompressed) except IOError: raise IOError("unable to write decompressed \ data to disk at %s" % cls.image_home) if gzipname != fullimagename: try: gzip.open(gzipname, "wb").write(decompressed) except IOError: raise IOError("unable to write gzipped \ data to disk at %s" % cls.image_home) if bzip2name != fullimagename: try: bz2.BZ2File(bzip2name, "wb").write(decompressed) except IOError: raise IOError("unable to write bzipped2 \ data to disk at %s" % cls.image_home) return fullimagename
import time import urllib from urllib import urlopen import re import cookielib, urllib from cookielib import CookieJar import datetime cj = CookieJar() opener = urllib.build_opener(urllib.HTTPCookieProcessor(cj)) opener.addheaders = [('User-agent', 'Mozilla/5.0')] def main(): try: page = 'http://www.huffingtonpost.com/feeds/index.xml' sourceCode = opener.open(page).read() #print sourceCode try: titles = re.findall(r'',sourceCode) links = re.findall(r'(.*?)',sourceCode) for title in titles: print title for link in links: print link except Exception as e: print str(e) except Exception as e: print str(e) pass
# -*- coding: utf8 -*- # File Name: dump_info_auth.py # Author: bill_law6 # mail: [email protected] # Created Time: Thu 19 Jan 2017 04:49:15 PM CST # Description: import sys, urllib, getpass import urllib.request class TerminalPassword(urllib.request.HTTPPasswodMgr): def find_user_password(self, realm, authuri): retval = urllib.request.HTTPPasswodMgr.find_user_password( self, realm, authuri) if retval[0] == None and retval[1] == None: sys.stdout.write("Login required for %s at %s" % (realm, authuri)) sys.stdout.write("Username: ") username = sys.stdin.readline().rstrip() password = getpass.getpass().rstrip() return (username, password) else: return retval req = urllib.request(sys.argv[1]) opener = urllib.build_opener( urllib.request.HTTPBasicAuthHandler(TerminalPassword())) fd = opener.open(req)
import urllib import ntlm_auth # import HTTPNtlmAuthHandler from sharepoint import SharePointSite username = '******' password = '******' url = 'http://cyg249:8080/tfs/DefaultCollection/_apis/wit/workitems/1' passman = urllib.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, url, username, password) auth_NTLM = urllib.HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(passman) opener = urllib.build_opener(auth_NTLM) site = SharePointSite(url, opener) for sp_list in site.lists: print (sp_list.id, sp_list.meta['Title'])
# メソッド,URIエンコードした URL,上で作ったパラメータ文字列を & で繋げます message=method+'&'+urllib.quote(url,'')+'&'+urllib.quote(oauth_str,'') # キーを元に message で hmac を作成します signature=hmac.new((consumer_secret+'&'+oauth_token_secret).encode('utf8'),message.encode('utf8'),hashlib.sha1).digest() if sys.version_info[0]<3: oauth['oauth_signature'] = signature.encode('base64').strip() else: oauth['oauth_signature'] = base64.b64encode(signature) # パラメータをソートし,URIエンコードした key=value の形にして , で繋げます header_oauth_str=','.join(urllib.quote(k,'')+'='+urllib.quote(oauth[k],'~') for k in sorted(oauth)) del oauth['oauth_signature'] #oauthオブジェクトを使いまわせるよう、signatureは破棄する必要があります。Fixed on Jul 5 # ヘッダに Authorization:OAuth + 上で作成した文字列を追加します opener = urllib.build_opener() opener.addheaders = [('Authorization','OAuth '+header_oauth_str)] # あとは普通に API を呼び出します if method=='POST': tweets=json.loads(opener.open(url,urllib.urlencode(params)).read().decode('utf8')) else: tweets=json.loads(opener.open(url+'?'+urllib.urlencode(params)).read().decode('utf8')) # --- cite end, my own scraping code follows --- if url=='https://api.twitter.com/1.1/search/tweets.json': tweets=tweets['statuses'] print('url,tweet_count') urls=[] for tweet in tweets: if tweet['entities'] and tweet['entities']['urls']: urls+=[e['expanded_url'] for e in tweet['entities']['urls']]
def run(self): QgsMessageLog.logMessage('Started task "{}"'.format( self.description()), MESSAGE_CATEGORY, Qgis.Info) if self.proxyHost!=None and self.ProxyPort!=None: QgsMessageLog.logMessage('Proxy? '+str(self.proxyHost), MESSAGE_CATEGORY, Qgis.Info) proxy = urllib.ProxyHandler({'http': proxyHost}) opener = urllib.build_opener(proxy) urllib.install_opener(opener) attlist={} attlist[self.item]=[] attlist[self.idfield]={} for f in self.layer.getFeatures(): if self.item in f: attlist[self.item].append(f[self.item]) attlist[self.idfield][f[self.idfield]]=True query="" if self.content=="Enrich URI": query+="SELECT ?item WHERE {\n" elif self.content=="Enrich Value" or self.strategy=="Enrich Both": query+="SELECT ?item ?val ?valLabel ?vals WHERE {\n" query+="VALUES ?vals { " print(attlist) for it in attlist[self.idfield]: if str(it).startswith("http"): query+="<"+str(it)+"> " elif self.idprop=="http://www.w3.org/2000/01/rdf-schema#label" and self.language!=None and self.language!="": query+="\""+str(it)+"\"@"+self.language+" " else: query+="\""+str(it)+"\" " query+=" } . \n" proppp=self.propertyy.data(1) if self.propertyy.data(1).startswith("//"): proppp="http:"+proppp if self.table.item(self.row, 7).text()!="" and "wikidata" in self.triplestoreurl: query+="?item wdt:P31 <"+self.table.item(self.row, 7).text()+"> .\n" else: query+="?item rdf:type <"+self.table.item(self.row, 7).text()+"> .\n" query+="?item <"+self.idprop+"> ?vals .\n" query+="?item <"+proppp+"> ?val . \n" if (self.content=="Enrich Value" or self.content=="Enrich Both") and not "wikidata" in self.triplestoreurl: query+="OPTIONAL{ ?val rdfs:label ?valLabel }" elif (self.content=="Enrich Value" or self.content=="Enrich Both") and "wikidata" in self.triplestoreurl: query+="SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],"+self.language+"\". }\n" query+="} " QgsMessageLog.logMessage("proppp: "+str(proppp), MESSAGE_CATEGORY, Qgis.Info) QgsMessageLog.logMessage("idprop: "+self.idprop, MESSAGE_CATEGORY, Qgis.Info) QgsMessageLog.logMessage(query, MESSAGE_CATEGORY, Qgis.Info) QgsMessageLog.logMessage(self.triplestoreurl, MESSAGE_CATEGORY, Qgis.Info) print(self.triplestoreurl) try: sparql = SPARQLWrapper(self.triplestoreurl, agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11") sparql.setQuery(query) sparql.setMethod(POST) print("now sending query") sparql.setReturnFormat(JSON) results = sparql.query().convert() except Exception as e: QgsMessageLog.logMessage("Trying GET query", MESSAGE_CATEGORY, Qgis.Info) try: sparql = SPARQLWrapper(self.triplestoreurl, agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11") sparql.setQuery(query) sparql.setMethod(GET) sparql.setReturnFormat(JSON) results = sparql.query().convert() except Exception as e: #msgBox=QMessageBox() #msgBox.setText("The following exception occurred: "+str(e)) #msgBox.exec() return False print(str(results)) #resultcounter=0 for resultcounter in results["results"]["bindings"]: if self.content=="Enrich Value": self.resultmap[resultcounter["vals"]["value"]]=resultcounter["valLabel"]["value"] elif self.content=="Enrich URI": self.resultmap[resultcounter["vals"]["value"]]=resultcounter["val"]["value"] else: self.resultmap[resultcounter["vals"]["value"]]=resultcounter["valLabel"]["value"]+";"+resultcounter["val"]["value"] self.columntype=self.detectColumnType(self.resultmap,self.table) QgsMessageLog.logMessage(str(self.columntype), MESSAGE_CATEGORY, Qgis.Info) QgsMessageLog.logMessage(str(self.resultmap), MESSAGE_CATEGORY, Qgis.Info) return True
import urllib import urllib.request import http.cookiejar import string import re hosturl = 'https://os.ncuos.com/api/user/token' posturl = 'https://os.ncuos.com/api/user/token' cj = http.cookieljar.CookieJar() cookie_support = urllib.HTTPCookieProcessor(cj) opener = urllib.build_opener(cookie_support,urllib.HTTPHandler) urllib.install_opener(opener) h = urllib.urlopen(hosturl) headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer':'https://os.ncuos.com/api/user/token'} postData = {'op' : 'dmlogin', 'f' : 'st', 'user' : '8002118162', 'pass' : '200617', 'rmbr' : 'ture', 'tmp' : '0.0008502006530761719s' } postData = urllib.urllib.urlencode(postData) request = urllib.Request(posturl,postData,headers)