def getHtml(url,charset=None): if(charset is None): return getHtmlDefault(url) else: response = getResponse(url) if(response is None): return None else: if (response.headers.get('Content-Encoding') == 'gzip'): # decompress page = decompress(response.read()) else: page = response.read() return page.decode(charset,'ignore')
def get_sub_monitor_bill(self, main_sn): """get feedback subSn by mainSn. """ sub_url = ("http://%s/ida30/jsp/svr/billdeal/common/feedback.jsp?" "flag=S&recordSn=&mainSn=%s" % (self.host, main_sn)) record_sn = "" sub_sn = "" try: response = urllib.request.urlopen(sub_url) content = str(response.read()) i = 0 for m in self.members: i = content.find(m) if i > 0: break if i > 0: i = content.rfind('value="', 0, i) + 7 e = content.find('"', i) record_sn = content[i:e] i = content.find('">', e) + 2 e = content.find('<', i) sub_sn = content[i:e] return record_sn + "$" + sub_sn except (URLError, HTTPError): self.write_log(("Error: Access SubSn URL fail.", main_sn, self.uid), "%s [%s] (%s)") return False
def fb_monitor_bill(self, main_sn): """feedback monitor bill automatically. """ record_sn = self.get_sub_monitor_bill(main_sn) if not record_sn: return False elif record_sn != "$": record_sn, main_sn = record_sn.split("$") fb_url = ("http://%s/ida30/svr/net/CommonAction.do?" "method=feedback&recordSn=%s&flag=S" % (self.host, record_sn)) else: fb_url = ("http://%s/ida30/svr/net/CommonAction.do?" "method=mainFeedback&mainSn=%s&flag=S" % (self.host, main_sn)) data = urlencode({"percent": "0", "procCode": "99", "procCodeText": "其他", "procDesc": "网络运行一切正常。"}) request = urllib.request.Request(fb_url, data) try: response = urllib.request.urlopen(request) content = str(response.read()) if content.find('flgSuc = "Y"') > 0: self.write_log(("Feedback OK.", main_sn, self.uid), "%s [%s] (%s)") return True else: self.write_log(("Error: Feedback fail.", main_sn, self.uid), "%s [%s] (%s)") return False except (URLError, HTTPError): self.write_log(("Error: Access Feedback URL fail.", main_sn, self.uid), "%s [%s] (%s)") return False
def getResult(query): url = getUrl(query) import urllib.request with urllib.request.urlopen(url) as response: html = response.read() html = str(html, 'utf-8') html = html.encode(sys.stdout.encoding, errors='ignore') html = str(html) html = html.split('\\n') i = 0 result = '' for line in html: if (i > 64 ): if ('[' in line): break if ('[23]Image' in line): continue if( 'About' in line and 'results' in line): continue result = result + '\n' + line i = i+1 return result
def get_url_content(website): headers = {'Accept-Charset': 'utf-8', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'} request = urllib.request.Request(website, headers=headers) response = urllib.request.urlopen(request) html = response.read() response.close() return html
def get_content(link): # Crawl Website Moviebarcodes.tumblr.com/movie-index response = urllib.request.urlopen(link) str_response = unescape(response.read().decode('utf-8')) # debugKH(str_response) # use inputstring process_file(str_response) return str_response
def get_player_information(self, Player=None): if Player is None: return None url = Consts.URL['base'] + Consts.URL['player'] + Player.id + '-' + Player.firstName + '-' + Player.lastName req = urllib.request.Request(url) response = urllib.request.urlopen(req) response_data = response.read() return response_data
def save_url_to_file(url, filename): """ :rtype: Int """ with urllib.request.urlopen(url) as response: with open(filename, 'wb') as file: if file.write(response.read()): return 0; return 1
def getIssueList(): """ Return all issues for REPO_ID """ request = urllib.request.Request(URL + "/api/v3/projects/" + REPO_ID + "/issues", headers={"PRIVATE-TOKEN" : TOKEN }) context = ssl._create_unverified_context() try: response = urllib.request.urlopen(request,context=context) except HTTPError as e: return e.read().decode("utf-8") return json.loads(response.read().decode("utf-8"))
def get_personal_info(self, Employee=None): if Employee is None: return None url = Consts.URL['base'] + Consts.URL['employee'] + Employee.id req = urllib.request.Request(url) response = urllib.request.urlopen(req) responseData = response.read() return responseData
def get_player_transfer_history(self, Player=None): if Player is None: return None url = Consts.URL['base'] + Consts.URL['player'] + Player.id + '-' + Player.firstName + '-' + Player.lastName + \ Consts.URL['history'] req = urllib.request.Request(url) response = urllib.request.urlopen(req) responseData = response.read() return responseData
def _GetAuthToken(self, email, password): """Uses ClientLogin to authenticate the user, returning an auth token. Args: email: The user's email address password: The user's password Raises: ClientLoginError: If there was an error authenticating with ClientLogin. HTTPError: If there was some other form of HTTP error. Returns: The authentication token returned by ClientLogin. """ account_type = self.account_type if not account_type: if (self.host.split(':')[0].endswith(".google.com") or (self.host_override and self.host_override.split(':')[0].endswith(".google.com"))): account_type = "HOSTED_OR_GOOGLE" else: account_type = "GOOGLE" data = { "Email": email, "Passwd": password, "service": "ah", "source": self.source, "accountType": account_type } req = self._CreateRequest( url=("https://%s/accounts/ClientLogin" % os.getenv("APPENGINE_AUTH_SERVER", "www.google.com")), data=urllib.parse.urlencode(data)) try: response = self.opener.open(req) response_body = response.read() response_dict = dict(x.split("=") for x in response_body.split("\n") if x) if os.getenv("APPENGINE_RPC_USE_SID", "0") == "1": self.extra_headers["Cookie"] = ( 'SID=%s; Path=/;' % response_dict["SID"]) return response_dict["Auth"] except urllib.error.HTTPError as e: if e.code == 403: body = e.read() response_dict = dict(x.split("=", 1) for x in body.split("\n") if x) raise ClientLoginError(req.get_full_url(), e.code, e.msg, e.headers, response_dict) else: raise
def __init__(self, url): response = None request = urllib.request.Request(url) request.add_header("User-Agent", self.user_agent) # noinspection PyBroadException try: response = urllib.request.urlopen(request) except: print("Error: Invalid URL. Exiting.") exit() html_content = response.read().decode("utf8") self.__parse_content(html_content)
def getHtmlDefault(url): response = getResponse(url) page=None if(response == None): return None if(response.headers.get('Content-Encoding') == 'gzip'): #decompress page = decompress(response.read()) else: page = response.read() charset = chardet.detect(page) time = 0 while (charset['encoding'] == None and time < DETECT_TIME): charset = chardet.detect(page) ++time if (charset['encoding'] != None): html = page.decode(charset['encoding'],'ignore') return html else: print("can't decode %s" % url) return None
def urlget_Test1(msg): return import urllib.parse import urllib.request import urllib.response print('urlget_Test1()') #url = 'http://www.someserver.com/cgi-bin/register.cgi' url = 'http://mycase.in.gov/default.aspx' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } headers = { 'User-Agent' : user_agent } print('Header: ' + str(headers)) data = urllib.parse.urlencode(values) #req = urllib.request.Request(url, data, headers) req = urllib.request.Request(url, b'User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)') #req = urllib.request.Request(url, headers) #req = urllib.request.Request(url) response = urllib.request.urlopen(req) the_page = response.read() print(the_page) exit() try: response = urllib.request.urlopen(req) except urllib.error.HTTPError as e: print(e.code) print(e.read()) exit(1) else: the_page = response.read() print(the_page) exit()
def download_manifest_as_json(): """Download and parse the remote manifest The full file is downloaded and parsed as JSON; the resulting dict is returned. """ manifest_url = os.getenv('FLOORED_MANIFEST_URL', 'http://blender.floored.com/blendloft/manifest.json') _log.info('manifest url: %s', manifest_url) req = urllib.request.Request(manifest_url, headers=_download_headers) with urllib.request.urlopen(req) as response: text = response.read().decode(encoding='UTF-8') return json.loads(text)
def accept_monitor_bill(self, record_sn, main_sn): """accept monitor bill automatically. """ accept_url = ("http://%s/ida30/svr/net/AcceptAction.do?" "method=acceptBatch&recordSn=%s&mainSns=%s&businessCode=IDB_SVR_NET" % (self.host, record_sn, main_sn)) try: response = urllib.request.urlopen(accept_url) content = str(response.read()) if content.find('异常页面') < 0: self.write_log(("Accept OK.", main_sn, self.uid), "%s [%s] (%s)") return True except (URLError, HTTPError): self.write_log("Error: Access Accept URL fail.", "%s", log_flag=False) return False
def request_until_succeed(url): req = urllib.request.Request(url) success = False while success is False: try: response = urllib.request.urlopen(req) if response.getcode() == 200: success = True except Exception as e: print (e) time.sleep(5) print ("Error for URL %s: %s" % (url, datetime.datetime.now())) return response.read().decode('utf8')
def get_personal_search(self, role, page=1, min_age=0, max_age=99): if role is None: return None url = Consts.URL['base'] + Consts.URL['employee search'] + '&speciality=' + role + '&page=' + str(page) values = '&country_id=' + '&job_status=1' + '&age_min=' + str(min_age) + '&age_max='\ + str(max_age) + '&search=1&commit=S%C3%B8g' try: response = self.connection.open(url + values) responseData = response.read() except: responseData = '404' return responseData
def get_article(self): try: req=urllib.request.Request(self._myUrl,headers=self._headers) print('Obtain response from the server.......') except: print('Fail to get response.........') response=urllib.request.urlopen(req) result=response.read() unicodepage=result.decode('gb2312', 'ignore') the_time = re.search(r'<title>(.*)</title>',unicodepage,re.DOTALL).group(1) s_split = re.split('<div class="field-items">', unicodepage) thearticle = s_split[1] thearticle = re.sub('<img .*?/>',"",thearticle) thearticle = re.split('<div id="footer">',thearticle)[0] thearticle = "<html><head>"+self._css +"</head><body><h1>"+the_time+"</h1>"+thearticle+"</body></html>" return thearticle
def get_json(self): #Form get request url = 'http://www.omdbapi.com/?' values = {'t': self.get_title(), 'y': '', 'plot': 'short', 'r': 'json'} data = urllib.parse.urlencode(values) full_request = url + data #Request to server response = urllib.request.urlopen(full_request) #Read data in response object ad decode response = response.read().decode("utf-8") #Convert response to dict response = ast.literal_eval(response) return response
def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs): """ Query DuckDuckGo, returning a Results object. Here's a query that's unlikely to change: --- result = query('1 + 1') --- result.type 'nothing' --- result.answer.text '1 + 1 = 2' --- result.answer.type 'calc' Keword arguments: useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str) safesearch: True for on, False for off. Default: True (bool) html: True to allow HTML in output. Default: False (bool) meanings: True to include disambiguations in results (bool) Any other keyword arguments are passed directly to DuckDuckGo as URL params. """ safesearch = '1' if safesearch else '-1' html = '0' if html else '1' meanings = '0' if meanings else '1' params = { 'q': query, 'o': 'json', 'kp': safesearch, 'no_redirect': '1', 'no_html': html, 'd': meanings, } params.update(kwargs) encparams = urllib.parse.urlencode(params) url = 'http://api.duckduckgo.com/?' + encparams request = urllib.request.Request(url, headers={'User-Agent': useragent}) response = urllib.request.urlopen(request) json = j.loads(response.read().decode('utf-8')) response.close() return Results(json)
def validate_code(self): """get validate code from url. """ rand_date = "%s" % datetime.now() rand_date = rand_date.replace(' ', '%20') is_change_color = "N" validate_url = "http://%s/validateCode?randDate=%s&isChangColor=%s" % (self.host, rand_date, is_change_color) try: response = urllib.request.urlopen(validate_url) code = response.read() except (URLError, HTTPError): self.write_log("Error: Access ValidateCode URL fail.", "%s", log_flag=False) return False else: with open("validate.png", 'wb') as image_file: image_file.write(code) img = Image.open("validate.png") pix = img.load() width, height = img.size code_str = '' for i in range(width): for j in range(height): for p in pix[i, j]: code_str = code_str + str(p) with sqlite3.connect("feedback3.db") as conn: conn.text_factory = str cu = conn.cursor() sql = "select image_code from t_v_code where image_md5='%s'" cu.execute(sql % hashlib.md5(code_str.encode()).hexdigest()) image_code = cu.fetchone() if image_code is None: return False else: return image_code[0]
def save_url_to_file_with_auth(url, filename, username, password): # create a password manager password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, url, username, password) handler = urllib.request.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) opener = urllib.request.build_opener(handler) # use the opener to fetch a URL opener.open(url) # Install the opener. # Now all calls to urllib.request.urlopen use our opener. urllib.request.install_opener(opener) with urllib.request.urlopen(url) as response: with open(filename, 'wb') as file: if file.write(response.read()): return 0; return 1
#with open('SampleRequest.json') as json_file: # data = json.load(json_file) #json_file = open('SampleRequest.josn') f = open("SampleRequest.json", "r") file = json.load(f) print(file) headers = { 'Content-Type': 'application/json', 'Ocp-Apim-Subscription-Key': '-------', } #body = json.dumps() params = urllib.parse.urlencode({}) try: conn = http.client.HTTPSConnection('westus2.api.cognitive.microsoft.com') conn.request("POST", "/text/analytics/v2.1/entities?%s" % params, file, headers) response = conn.getresponse() data = response.read() print(data) conn.close() except Exception as e: #print("[Errno {0}] {1}".format(e.errno, e.strerror)) print("ohno")
def getPage(pageNo): url = "http://www.xiumeim.com/albums/page-%s.html" % pageNo request = urllib.request.Request(url) response = opener.open(request) return str(response.read().decode())
#다음 웹툰 > 어쩌다 발견한 7월 from urllib import response from urllib.request import urlopen import json if __name__ == '__main__': # response = urlopen("http://webtoon.daum.net/data/pc/webtoon/view/findjuly") # response_byte = response.read() # response_json = json.loads(response_byte) with urlopen( "http://webtoon.daum.net/data/pc/webtoon/view/findjuly") as data: response_byte = response.read() response_json = json.loads(response_byte) #print(json.loads(response.read())) # print(response_json['data']['webtoon']['webtoonEpisodes'][11]['title']) cartoon_titles = response_json['data']['webtoon']['webtoonEpisodes'] for item in cartoon_titles: title = item['title'] thumbnail = item['thumbnailImage']['url'] print(title) print(thumbnail)
#!/usr/bin/env python #-*- encoding:utf-8 -*- from urllib import request, response import chardet if __name__=="__main__": response=request.urlopen("http://www.baidu.com"); html=response.read(); html=html.decode("utf-8"); html=chardet.detect(bytes(html,encoding="utf-8")); print(html);
import urllib.request import urllib.response from urllib.parse import urlencode values = {"username": "******", "password": "******"} data = urlencode(values) print(data) url = "http://quote.eastmoney.com/stocklist.html" request = urllib.request.Request('%s?%s' % (url, data)) #request = urllib.request.Request(url,data.encode('utf-8')) response = urllib.request.urlopen(request) print(response.read().decode('gbk'))
#!/usr/bin/python3 """ displays the value of the X-Request-Id variable found in header of respon\ se """ import urllib.request import urllib.parse import sys import urllib.response import urllib.error if __name__ == "__main__": try: req = urllib.request.Request(sys.argv[1]) with urllib.request.urlopen(req) as response: the_page = response.read().decode('utf-8') print(the_page) except urllib.error.URLError as e: ResponseData = e.read().decode("utf8", 'ignore') print('Error code: {}'.format(e.code))
def content(self): li = [] for j in range(274, 275): for i in range(j, j + 1): url = 'http://yuanjian.cnki.net/Search/Result' print('当前页', i) time.sleep(random.random() * 3) formdata = {'Type': 1, 'Order': 1, 'Islegal': 'false', 'ArticleType': 1, 'Theme': 'XRD', 'searchType': 'MulityTermsSearch', 'ParamIsNullOrEmpty': 'true', 'Page': i} try: #r = requests.post(url, data=formdata, headers=self.headers, cookies=self.cookies , params=self.param) #print(111) # r.raise_for_status() # print(222) #r.encoding = r.apparent_encoding #print(333) request = requests.post(url=url, data=formdata, headers=self.headers, cookies=self.cookies , params=self.param ) try: print(123) response = requests.post.urlopen(request, timeout=1) print(121) html = response.read().decode("utf-8") print(html) except urllib.error as e: if hasattr(e, "code"): print(e.code) if hasattr(e, "reason"): print(e.reason) #eturn html data = etree.HTML(html.text) print(data) # 链接列表 url_list = data.xpath("//*[@id='article_result']/div/div/p[1]/a[1]/@href") print(url_list) # 关键词列表 key_wordlist = [] all_items = data.xpath("//*[@id='article_result']/div/div") print (len(all_items)) for i in range(1, len(all_items) + 1): key_word = data.xpath("//*[@id='article_result']/div/div[%s]/div[1]/p[1]/a/text()" % i) key_words = ';'.join(key_word) key_wordlist.append(key_words) # 来源 source_items = data.xpath("//*[@id='article_result']/div/div") for j in range(1, len(source_items) + 1): sources = data.xpath("//*[@id='article_result']/div/div/p[3]/a[1]/span/text()") for index, url in enumerate(url_list): items = {} try: print('当前链接:', url) content = requests.get(url, headers=self.headers) contents = etree.HTML(content.text) # 论文题目 title = contents.xpath("//h1[@class='xx_title']/text()")[0] items['titleCh'] = title items['titleEn'] = '' print('标题:', title) # 来源 source = sources[index] items['source'] = source print('来源:', source) # 关键字 each_key_words = key_wordlist[index] print('关键字:', each_key_words) items['keywordsEn'] = '' items['keywordsCh'] = each_key_words # 作者 author = contents.xpath("//*[@id='content']/div[2]/div[3]/a/text()") items['authorCh'] = author items['authorEn'] = '' print('作者:', author) # 单位 unit = contents.xpath("//*[@id='content']/div[2]/div[5]/a[1]/text()") units = ''.join(unit).strip(';') items['unitCh'] = units items['unitEn'] = '' print('单位:', units) # 分类号 classify = contents.xpath("//*[@id='content']/div[2]/div[5]/text()")[-1] c = ''.join(classify).split(';') res = [] for name in c: print('当前分类号:', name) try: if name.find("TP391.41") != -1: print('改变分类号!') name = 'TP391.4' result = requests.get('http://127.0.0.1:5000/%s/' % name) time.sleep(5) re_classify1 = result.content string = str(re_classify1, 'utf-8') classify_result = eval(string)['classfiy'] # print('文献分类导航:', classify_result) except Exception as e: print(e) res.append(classify_result) print('文献分类导航:', res) items['classify'] = res # 摘要 abstract = contents.xpath("//div[@class='xx_font'][1]/text()")[1].strip() print('摘要:', abstract) items['abstractCh'] = abstract items['abstractEn'] = '' # 相似文献 similar = contents.xpath( "//*[@id='xiangsi']/table[2]/tbody/tr[3]/td/table/tbody/tr/td/text()") si = ''.join(similar).replace('\r\n', '').split('期') po = [] for i in si: sis = i + '期' if len(sis) > 3: po.append(sis) items['similar_article'] = po # 参考文献 refer_doc = contents.xpath("//*[@id='cankao']/table[2]/tbody/tr[3]/td/table/tbody/tr/td/text()") items['refer_doc'] = refer_doc li.append(items) except Exception as e: print(e) print(len(li)) except Exception as e: print(e) return li
def download(url, headers={}): req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req) content = response.read().decode('utf-8') response.close() return content
noDESCnoMAPwriter.writeheader() with open("websitet14.csv", newline="") as csvfile: # variables added to a list reader = csv.reader(csvfile, delimiter=',') for row in reader: info = False description = False maPrice = False # row[2] contains the internal part number for website request = urllib.request.Request( 'http://www.website.com/search.php?search=' + row[2] + '&type=Part+%23') response = opener.open(request) pagedata = response.read() soup = BeautifulSoup(pagedata.decode('utf-8', 'ignore')) # Cost, Retail, Jobber, and Map area on the websitet14.csv # row[4], row[5], row[6], and row[7] respectively productrow.row["Vendor"] = row[0] productrow.row["Variant SKU"] = row[2] if row[5] != "\\N": # the retail price that's marked out on the webpage productrow.row["Variant Compare At Price"] = row[5] productrow.row["Variant Price"] = row[7] if productrow.row["Variant Price"] != "\\N": maPrice = True
def get_page(self, page_index): url = self.SiteUrl + '?page=' + str(page_index) request = urllib.request.Request(url) response = urllib.request.urlopen(request) return response.read().decode('gbk')
import urllib.response import urllib.request import urllib.parse from urllib import response URL = 'https://baike.baidu.com/item/' new_url = "相声有新人/22779051" new_url = urllib.parse.quote(new_url) date = {"fr": "aladdin"} date = urllib.parse.urlencode(date) URL = URL + new_url + "?" + date headers = { "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36" } request = urllib.request.Request(URL, headers=headers) response = urllib.request.urlopen(request) html = response.read().decode() print(html)
import urllib from urllib import request, response url = 'http://www.baidu.com/' # 返回响应 response = urllib.request.urlopen(url=url) content = response.read().decode('utf-8') print(content)
def save_url_to_file(url, filename): with urllib.request.urlopen(url) as response: with open(filename, 'wb') as file: if file.write(response.read()): return 0 return 1
#初始化 Nasdaq='' Hetf='' Sp='' NasdaqSig=0 SpSig=0 HetfSig=0 urls=['http://www.qqjjsj.com/gpsc/index.html','http://www.qqjjsj.com/gpsc/index_2.html','http://www.qqjjsj.com/gpsc/index_3.html','http://www.qqjjsj.com/gpsc/index_4.html'] #打开网页 for url in urls: request=urllib.request.Request(url) response=urllib.request.urlopen(request) content=response.read().decode('utf-8') ##纳斯达克 if NasdaqSig==0: NasdaqPattern=re.compile(r'\d+年\d+月\d+日纳斯达克.*?平均市盈率为?\d+.\d+') NasdaqItems=re.findall(NasdaqPattern,content) if NasdaqItems: Nasdaq=NasdaqItems[0]+'\n' NasdaqSig=1 with open('Nasdaq.txt', 'a') as f: f.write(Nasdaq) ##标准普尔 if SpSig==0: SpPattern=re.compile(r'\d+年\d+月\d+日标准普尔.*?平均市盈率为?\d+.\d+')
__author__ = 'wangxiaodong' import urllib.response #1 response = urllib.request.urlopen('http://www.baidu.com') html = response.read() #2 req = urllib.request.Request('http://www.baidu.com') response = urllib.request.urlopen(req) the_page = response.read() #3 发送数据 import urllib.pase import urllib.request url = 'http://localhost/login.php' user_agent = 'Mozilla/4.0(compatible;MSIE 5.5;Windows NT' values = { 'act':'login', 'login[email]':'*****@*****.**', 'login[password]':'123456' } data = urllib.parse.urlencode(values) req = urllib.request.Request(url, data) req.add_header('Referer', 'http://www.python.org') response = urllib.request.urlopen(req) the_page = response.read() print(the_page.decode("utf8"))
#在Request对象中添加浏览器相关的头信息,把程序伪装成浏览器发送POST请求 #引入parse模块 import urllib.parse import urllib.response import urllib.request url = "http://httpbin.org/post" #设置浏览器信息 headers = { "User-Agent": "Mozilla/5.0(Macintosh;Intel Mac OS X 10_13_6) AppleWebKit/537.36(KHTML,like Gecko)Chrome/69.0.3497.100 Safari/537.36" } data_dict = {"word": "hello world"} #将字典类型数据转换成bytes字节流 data = bytes(urllib.parse.urlencode(data_dict), encoding='utf8') #创建Request对象 request_obj = urllib.request.Request(url=url, data=data, headers=headers, method="POST") response = urllib.request.urlopen(request_obj) print(response.read().decode("utf8"))
import urllib.request, urllib.response host = 'http://saweather.market.alicloudapi.com' path = '/area-to-id' method = 'GET' appcode = '7793711114fc4bbfb424f818eef8e7e2' querys = 'area=青岛' bodys = {} url = host + path + '?' + querys request = urllib.request.urlopen(url) request.add_header('Authorization', 'APPCODE ' + appcode) response = urllib.response.urlopen(request) content = response.read() if (content): print(content)
import urllib.request, urllib.response, http.cookiejar, cookiecutter from bs4 import BeautifulSoup url = 'https://www.cnblogs.com/zdlfb/p/6130724.html' print('第一种方法') response = urllib.request.urlopen(url) print(response.getcode()) print(len(response.read())) print(response.read()) # print('第二张方法') # request=urllib.request.request(url) # request.add_header('user-agent','mozilla/5.0')#爬虫伪装成浏览器 # response1=urllib.request.urlopen(request) # print(response1.getcode()) # print(len(response1.read())) # print('第三种方法') # cj=http.cookiejar # opener=urllib.build_opener(urllib.HttpCookieProcessor(cj)) # urllib.install_opener(opener) # response3=urllib.request.urlopen(url) # print(response3.getcode()) # print(response3.read()) soup = BeautifulSoup() #html文档字符串,html解析器,html文档编码
def gethtml(url): response = urllib.request.urlopen(url) return response.read()
用于处理robot.txt文件 ''' import urllib.request, urllib.response, urllib.parse, urllib.error, urllib.robotparser import csv import urllib.request import codecs if __name__ == "__main__": print("urllib爬取豆瓣网数据示例") print("搜索下关键字: Python") url = "https://api.douban.com/v2/book/search?q=python" response = urllib.request.urlopen(url) # 将bytes数据流解码成string ebook_str = response.read().decode() # 将string转换成dict ebook_dict = eval(ebook_str) # print(ebook_dict) # print(type(ebook_dict)) count = ebook_dict["count"] total = ebook_dict["total"] with codecs.open('books.csv', 'w', 'utf-8') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(["书名", "作者", "描述", "出版社", "价格"]) # 写书信息 for book in ebook_dict["books"]:
'speak', 'params': [ '1.1', { 'language': 'ja', 'text': n, 'voiceType': "*", 'audioType': "audio/x-wav" } ] } obj_command = json.dumps(tts_command) # string to json object obj_command = obj_command.encode('utf-8') req = urllib.request.Request(tts_url, obj_command) response = urllib.request.urlopen(req) received = response.read().decode('utf-8') # conv bytes to str by decode() # extract wav file obj_received = json.loads(received) tmp = obj_received['result']['audio'] # extract result->audio speech = base64.decodestring(tmp.encode('utf-8')) #.waveで出力 f = open("out.wav", 'wb') f.write(speech) f.close #音声合成の音声データを再生 input_filename = 'out.wav' buffer_size = 4096 wav_file = wave.open(input_filename, 'rb') p = pyaudio.PyAudio()
def crawl(self, link): tryOnce = 0 robotParser = self.setupRobotParser(link) if robotParser.can_fetch("*", link): while True: try: response = urllib.request.urlopen(link) break except urllib.error.HTTPError as e: if e.code == 429: if tryOnce == 1: print( 'Thread ' + str(self.crawlerID) + ': Too many requests: ' + link + ' returning.') return print('Thread ' + str(self.crawlerID) + ': Too many requests: ' + link + ' trying again in 120 seconds.') sleep(120) tryOnce = 1 else: return # for handling any other url errors except: print('Error opening link: ',link, " by thread : ", self.crawlerID) return returnedLink = response.geturl() if returnedLink != link: print('Thread ' + str(self.crawlerID) + ': Redirection:' + link + ' to ' + returnedLink + ' returning.') return urlInfo = response.info() dataType = urlInfo.get_content_type() if 'html' not in dataType: print('Thread ' + str(self.crawlerID) + ': Not HTML ' + link + ' returning.') return try: webContent = response.read().decode(response.headers.get_content_charset('utf-8')) except: print("Incomplete Read of web content due to a defective http server.") webContent = None if(webContent): Crawler.webpagesLock.acquire() if Crawler.webpagesSaved < NUMOFPAGES: Crawler.webpagesSaved += 1 else: print('Thread ' + str(self.crawlerID) + ': Page number limit reached ') Crawler.webpagesLock.release() return Crawler.webpagesLock.release() selector = None while True: try: selector = WebPages.select().where(WebPages.pageURL == returnedLink).exists() break except (OperationalError , sqlite3.OperationalError) as e: if 'binding' in str(e): break print('Thread ', self.crawlerID, ': Database busy, retrying. WebPagesTable') except: break if selector: print('Thread ' + str(self.crawlerID) + ': Updating webpage ' + link) while True: try: WebPages.update(pageContent=webContent).where( WebPages.pageURL == returnedLink).execute() break except (OperationalError, sqlite3.OperationalError) as e: if 'binding' in str(e): break print('Thread ', self.crawlerID, ': Database busy, retrying. WebPagesTable') except: break else: print('Thread ' + str(self.crawlerID) + ': Saving webpage ' + link ) try: inserted = False while True: try: if not inserted: WebPages(pageURL=returnedLink, pageContent=webContent).save() inserted = True ... PageRank.create(pageURL=returnedLink).update() ... break except (OperationalError, sqlite3.OperationalError) as e: if 'binding' in str(e): break print('Thread ', self.crawlerID, ': Database busy, retrying. WebPagesTable & PageRank') sleep(randint(1,5)) except: break #should never happen except: print('UnexpectedException: In saving webpage WEEEEEEEEEEEEEEEEEEEEEEE') print('Thread ' + str(self.crawlerID) + ': Done saving webpage and starting link extraction ' + link) try: parser = MyHTMLParser(link) parser.feed(str(webContent)) #should never happen except: print('UnexpectedException: in parser WEEEEEEEEEEEEEEEEEEEEEEE') size = 999 while True: try: for i in range(0, len(parser.links), size): UncrawledTable.insert_many(parser.links[i:i + size]).upsert().execute() break except (OperationalError, sqlite3.OperationalError) as e: if 'binding' in str(e): break print('Thread ', self.crawlerID, ': Database busy, retrying. UnCrawledTable') except: break while True: try: print("UNCRAWLED URLS = ", UncrawledTable.select().count(), ' Thread ' + str(self.crawlerID)) break except (OperationalError, sqlite3.OperationalError) as e: if 'binding' in str(e): break print('Thread ', self.crawlerID, ': Database busy, retrying. print UnCrawledTable') except: break print('Thread ' + str(self.crawlerID) + ': Done inserting links ' + link)
def getAnswer(self,answerId): host = "http://www.zhihu.com" url = host + answerId print(url) userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) Chrome/53.0.2785.116 Safari/537.36" #构造Header伪装浏览器 Header = {"User-Agent" : userAgent} #请求该地址 req = urllib.request.Request(url,headers=Header) #得到响应的内容 try: response = urllib.request.urlopen(req , timeout= 20 ) content = response.read() if content is None: print("Empty") return False except: print("timeOut,please try again") time.sleep(30) #try to switch proxy ip response = urllib.request.urlopen(req , timeout= 20) content = response.read() if content is None: print("Empty") return False #此时已获取页面的完整代码,接着用BeautifulSoups比正则方便 try: bs = BeautifulSoup(content,"lxml") except: print("BeautifulSoups Error") return False #获取该问题的标题 title = bs.title if title is None: print("title is Empty") return False if title.string is None: print("string is Empty") return False fileName_old = title.string.strip() # 用来保存内容的文件名,因为文件名不能有一些特殊符号,所以使用正则表达式过滤掉 fileName= re.sub('[\/:*?"<>|]', '-', fileName_old) self.save2file(fileName,title.string) #??????? 为什么此处的content是title.string 而不是 bs ???????????? # 获取问题的补充内容 detail = bs.find("div",class_ ="zm-editable-content") #class_ 是BeautifulSoup 的语法 self.save2file(fileName, "\n\n\n\n--------------------Link %s ----------------------\n\n" % url) self.save2file(fileName, "\n\n\n\n--------------------Detail----------------------\n\n") if detail is not None: for i in detail.strings: self.save2file(fileName,i) #获取问题的回答 answers = bs.find_all("div",class_="zm-editable-content clearfix") #定义参数 k = 0 index = 0 for each_answer in answers: self.save2file(fileName, "\n\n-------------------------answer %s via -------------------------\n\n" % k) #循环获取每一个答案的内容 for a in each_answer.strings: self.save2file(fileName,a) k += 1 index += 1 ################################################################## #初始化邮箱相关参数 smtp_server = 'smtp.126.com' from_mail = '*****@*****.**' password = '******' to_mail = '465731912@kindle','*****@*****.**' #调用发送邮件的函数 send_kindle=MailAbout(smtp_server,from_mail,password,to_mail) send_kindle.mail_text(fileName) print(fileName)
def process_custom_monitor_list(self): """get and process custom monitor list from monitor URL. """ monitor_url = ("http://%s/ida30/svr/cust/CustMonitorAction.do?" "method=queryPendingList&getHidFrame=Y&sysModuleId=A90AA1526CEC6022D2C7D2ABE9590308&" "eachpagerows=1000" % self.host) try: response = urllib.request.urlopen(monitor_url) content = str(response.read()) start = content.find('<table id="powergrid"') if start > 0: end = content.find('</table>', start) content = content[start:(end + 8)] content = content.replace("<br>", "") content = content.replace("<font color='red'>", "") content = content.replace("<font color='red' >", "") content = content.replace("</font>", "") content = content.replace('<img src="/ida30/images/svr/revert.gif" title="待回单">', "1") # parser monitor list. parser = FbHTMLParser() parser.feed(content) parser.close() mbs = parser.get_result() if len(mbs) > 0: mbs.remove(mbs[0]) # parser recordSn list. parser = AcceptHTMLParser() parser.feed(content) parser.close() record_sns = parser.get_result() i = 0 for mb in mbs: t = datetime(* time.strptime(mb[5], "%Y-%m-%d %H:%M:%S")[:6]) t1 = time.mktime(t.timetuple()) t2 = time.mktime(datetime.now().timetuple()) if mb[9] != "1": if t2 - t1 < 120: self.write_log(("New custom message,Action to recieve.", mb[1], self.uid), "%s [%s] (%s)") elif t2 - t1 > 600: self.accept_monitor_bill(record_sns[i], mb[1]) self.write_log(("Accept custom message automatically.", mb[1], self.uid), "%s [%s] (%s)") elif t2 - t1 > 480: self.write_log(("Accept custom message,Overtime.", mb[1], self.uid), "%s [%s] (%s)") i += 1 print("[%s] I'm here, Good luck to you! (%s)" % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), self.uid)) return self.interval else: self.write_log(("Error: Get CustomMonitorList fail.", self.uid), "%s (%s)") return 0 except (URLError, HTTPError): self.write_log(("Error: Access CustomMonitorList URL fail.", self.uid), "%s (%s)") return 0
if lights_flag: # connect to the bridge b = Bridge(bridge_ip) b.connect() # create a light grouping and turn them on lr_lamp = [1] command = {'on': True, 'bri': 127} b.set_light(lr_lamp, command) # print(b.get_api()) for n in range(500): feed = gtfs_realtime_pb2.FeedMessage() response = urllib.request.urlopen( 'https://gtfs.translink.ca/v2/gtfsposition?apikey=' + API_KEY) feed.ParseFromString(response.read()) green_dist = [] red_dist = [] for entity in feed.entity: if (entity.HasField('vehicle') and (entity.vehicle.trip.route_id == "16718") and (entity.vehicle.trip.direction_id == westbound)): # print(entity) lat_1 = entity.vehicle.position.latitude lon_1 = entity.vehicle.position.longitude busID = entity.vehicle.vehicle.id now = datetime.now() bus_checkin_time = datetime.fromtimestamp( int(entity.vehicle.timestamp))
def get(url: str) -> bytes: with urllib.request.urlopen(url) as response: return response.read()
def download_file(download_url, i): response = request.urlopen(download_url) file = open("./pdf/document" + i + ".pdf", 'wb') file.write(response.read()) file.close() print("Completed")
# -*- coding:utf-8 -*- import urllib.request import urllib.response class RedirectHandler(urllib.request.HTTPRedirectHandler): def http_error_301(self, req, fp, code, msg, headers): pass def http_error_302(self, req, fp, code, msg, headers): result = urllib.request.HTTPRedirectHandler.http_error_302( self, req, fp, code, msg, headers) result.status = code result.newurl = result.geturl() return result opener = urllib.request.build_opener(RedirectHandler) url = "http://www.baidu.com" response = opener.open(url) data = response.read().decode() print(data) print(response.geturl())
# Create the url to access the api url = '{0}://{1}{2}'.format(options.protocol, hostname, api_url) # Create the request request = urllib.request.Request(url) # Basic authentication... credential = base64.b64encode(bytes('%s:%s' % (options.username, options.password), 'ascii')) request.add_header("Authorization", "Basic %s" % credential.decode('utf-8')) request.add_header('OCS-APIRequest', 'true') try: with urllib.request.urlopen(request) as response: content = response.read() except urllib.error.HTTPError as error: # User is not authorized (401) print('UNKOWN - [WEBREQUEST] {0} {1}'.format(error.code, error.reason)) sys.exit(3) except urllib.error.URLError as error: # Connection has timed out (wrong url / server down) print('UNKOWN - [WEBREQUEST] {0}'.format(str(error.reason).split(']')[0].strip())) sys.exit(3) try: # Convert the webrequest response to xml xml_root = xml.etree.ElementTree.fromstring(content) except xml.etree.ElementTree.ParseError: print('UNKOWN - [XML] Content contains no or wrong xml data... check the url and if the api is reachable!') sys.exit(3)
def httpreg(softname, version, phydriverserial, regkey): signkey = '&key=0z#z#b#094kls#040jkas892#z#z#b#0' data = {} data["softname"] = softname data["version"] = version data["phydriverserial"] = phydriverserial data["regkey"] = regkey keys = sorted(data) src = "" for key in keys: if len(src): src = src + "&" src = src + key src = src + "=" src = src + data[key] str = src + signkey str = str.encode("utf8") #phydriverserial=123®key=456&softname=cqsc&version=1.0.0.0&key=0z#z#b#094kls#040jkas892#z#z#b#0 #phydriverserial=123®key=456&softname=cqsc&version=1.0.0.0&key=0z#z#b#094kls#040jkas892#z#z#b#0 m.update(str) result = m.hexdigest() data = src.encode("utf8") url = "http://caiptong.com/share/share_registdeviceid?sign=%s" % (result) request = urllib.request.Request(url=url, data=data, headers=headers, method='POST') try: #response = urllib.request.urlopen(request) response = opener.open(request, timeout=5) html = response.read().decode() except urllib.error.HTTPError as e: #print('The server couldn\'t fulfill the request.') #print('Error code: ' + str(e.code)) #print('Error reason: ' + e.reason) print("错误", "网络连接错误!") return False except urllib.error.URLError as e: #print('We failed to reach a server.') #print('Reason: ' + e.reason) print("错误", "网络连接错误!") return False except Exception as msg: print("Exception:%s" % msg) return False except: #print("error lineno:" + str(sys._getframe().f_lineno)) print("错误", "网络连接错误!") return False html = html.strip() #print(html) json_data = json.loads(html) #{"msg":"登录成功.","success":true,"datas":{"ckregkey":false,"topics":[],"userid":175}} if json_data["success"] != True: print("错误", "账号未注册!") if "datas" in json_data: print("错误", json_data["datas"]["notice"]) return False else: if "datas" in json_data: datas = json_data["datas"] if "ckregkey" in datas: if datas["ckregkey"] == True: return True return False
with open ("mexica.txt", "r") as ff: story = ff.read() print(story) list_of_words = nltk.word_tokenize(story) tagged_words = nltk.pos_tag(list_of_words) list_adj = [x for x, i in tagged_words if i == "JJ"] metaphor_dict = {} for i in list_adj: url = "http://bonnat.ucd.ie/jigsaw/index.jsp?q=" + i with request.urlopen(url) as response: page_source = response.read() x = {i: [y for y in re.findall('longvehicle=(.*?)">', str(page_source))]} metaphor_dict.update(x) connectors = ['like', 'as'] list_of_words_2 = nltk.word_tokenize(story) for i, j in enumerate(list_of_words): if j in list_adj: if len(metaphor_dict.get(j)) == 0: continue else: y = str(random.choice(connectors) + " " + random.choice(metaphor_dict[str(j)])) for z, k in enumerate(list_of_words_2): if k == j: list_of_words_2.insert(z+1, y) break
def __init__(self): self.token = 'unknown' def blue_open(self, req): #<protocole>_open print('Blue BlueSchemeHandler') url = req.get_full_url() scheme, data = url.split(':', 1) headers = {} newURL = 'https:' + data #newURL = urllib.parse.unquote_to_bytes(newURL) newReq = urllib.request.Request(newURL) fp = urllib.request.urlopen(newReq) return urllib.response.addinfourl(fp, headers, url) myAuthProxy = DatasetAuthProxy() blueSchemeHandler = BlueSchemeHandler() opener = urllib.request.build_opener(myAuthProxy, blueSchemeHandler) urllib.request.install_opener(opener) # Client side test - With the \blue\ protocole / scheme \n, #response = urllib.request.urlopen('blue://www.lefigaro.fr') #print('Response:\n') #print(response.read()) f = open('blue://www.lefigaro.fr') print('Response:\n') print(response.read())
def getStockMetricsData(stockDataUrl): #stockMetricsData = {} stockValueMetricsData = {} stockPrice = 0 filteredStockMetricsData='' try: req = urllib.request.Request( stockDataUrl, data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } ) response = urllib.request.urlopen(req, timeout=10) except ssl.SSLError as err: print('SSLError: Socket Connection timed out with error: ', err) return '' except urllib.error.HTTPError as e: print('HTTPError: The server couldn\'t fulfill the request. Error code: ', e.code) return '' except urllib.error.URLError as e: print('URLError: We failed to reach a server. Reason: ', e.reason) return '' except: print('An error occurred while accessing: ', stockDataUrl) return '' #html = requests.get(stockDataUrl, timeout=5).read() #html = urllib.request.urlopen(stockDataUrl).read() html=response.read() soup = BeautifulSoup(html, features="lxml") data = soup.findAll(text=True) result = filter(visible, data) items_list=list(result) print(items_list) #Order of keys: DoubleSpacedKeys1 SingleSpacedKeys2 TripleSpacedPriceKeys3 DoubleSpacedKeys4 NoSpacedKeys5 DoubleSpacedKeys6 #print(DoubleSpacedKeys1) #DoubleSpacedKeys1 = [' Market Cap $: ', ' Enterprise Value $: ', ' Volume: ', ' Avg Vol (1m): '] for keyFeature in DoubleSpacedKeys1: keyData=find_element_in_list(keyFeature, items_list,2) # print(keyFeature.replace(',', '').replace(' ', '').replace('\n', ''), keyData) # Add any filtering conditions here if (keyData != 0): if (keyFeature == DoubleSpacedKeys1[0] and keyData < MIN_MARKET_CAP_MIL): return '' if (keyFeature == DoubleSpacedKeys1[2] and keyData < MIN_TRADE_VOL): return '' if (keyFeature == DoubleSpacedKeys1[3] and keyData < MIN_AVG_TRADE_VOL): return '' filteredStockMetricsData += str(keyData) + ',' #stockMetricsData[keyFeature.replace(',', '').replace(':', '').replace(' ', '').replace('\n', '')] = keyData # SingleSpaced fields: '\nPrice: ', ' $3.76\n', #print(SingleSpacedKeys2) for keyFeature in SingleSpacedKeys2: keyData=find_element_in_list(keyFeature, items_list,1) #Extra field for PriceChange to be added manually right before Price field if keyFeature == '\nPrice: ': # '\nWipro Ltd\n$\n3.76\n', ' -0.04 (-1.05%)\n', ' ', ' ', '\n', ' ', ' Volume: ', extraFeature = ' Volume: ' if (keyData == 0): keyData = find_element_in_list(extraFeature, items_list, -6) stockPrice = keyData priceChangePercent = find_element_in_list(extraFeature, items_list, -5) # print('1DayPriceChange %', priceChangePercent) filteredStockMetricsData += str(priceChangePercent) + ',' #stockMetricsData['1DayPriceChangePercent'] = priceChangePercent # print(keyFeature.replace(',', '').replace(' ', '').replace('\n', ''), keyData) # Add any filtering conditions here if (keyData != 0) and (keyFeature == SingleSpacedKeys2[0]) and (keyData < MIN_STOCK_PRICE): return '' filteredStockMetricsData += str(keyData)+',' #stockMetricsData[keyFeature.replace(',', '').replace(':', '').replace(' ', '').replace('\n', '')] = keyData #print(TripleSpacedPriceKeys3) #['\nEarnings Power Value\n', '\nNet Current Asset Value\n', '\nTangible Book\n','\nMedian P/S Value\n', # '\nGraham Number\n','\nPeter Lynch Value\n','\nDCF (Earnings Based)\n','\nDCF (FCF Based)\n','\nProjected FCF\n'] for keyFeature in TripleSpacedPriceKeys3: keyData=find_element_in_list(keyFeature, items_list,3) stockValueMetricsData[keyFeature] = keyData # print(keyFeature.replace(',', '').replace(' ', '').replace('\n', ''), keyData) # Add any filtering conditions here #filteredStockMetricsData += str(keyData) + ',' #stockMetricsData[keyFeature.replace(',', '').replace(':', '').replace(' ', '').replace('\n', '')] = keyData #print(DoubleSpacedKeys4) #['\nPB Ratio\n', '\nPrice-to-Tangible-Book\n', '\nPS Ratio\n', '\nPrice-to-Median-PS-Value\n', '\nPrice-to-Graham-Number\n', '\nPrice-to-Peter-Lynch-Fair-Value\n', # 6: '\nPrice-to-Intrinsic-Value-DCF (Earnings Based)\n','\nPrice-to-Intrinsic-Value-DCF (FCF Based)\n','\nPrice-to-Intrinsic-Value-Projected-FCF\n','\nPrice-to-Earnings-Power-Value\n', # 10: '\nPEG Ratio\n', '\nCurrent Ratio\n', '\nQuick Ratio\n', '\nCash-To-Debt\n', '\nEquity-to-Asset\n', # 15: '\nDebt-to-Equity\n', '\nDebt-to-EBITDA\n','\nOperating Margin %\n', '\nNet Margin %\n', '\nROE %\n', '\nROA %\n'] for keyFeature in DoubleSpacedKeys4: keyData=find_element_in_list(keyFeature, items_list,2) #print(keyFeature.replace(',', '').replace(' ', '').replace('\n', ''), keyData) # Use previously extracted data as a fallback, when one of the metric is not available if (keyData == 0): if ((keyFeature == DoubleSpacedKeys4[1]) and stockValueMetricsData[TripleSpacedPriceKeys3[2]] != 0): keyData = stockPrice/stockValueMetricsData[TripleSpacedPriceKeys3[2]] elif ((keyFeature == DoubleSpacedKeys4[3]) and stockValueMetricsData[TripleSpacedPriceKeys3[3]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[3]] elif((keyFeature == DoubleSpacedKeys4[4]) and stockValueMetricsData[TripleSpacedPriceKeys3[4]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[4]] elif((keyFeature == DoubleSpacedKeys4[5]) and stockValueMetricsData[TripleSpacedPriceKeys3[5]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[5]] elif((keyFeature == DoubleSpacedKeys4[6]) and stockValueMetricsData[TripleSpacedPriceKeys3[6]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[6]] elif((keyFeature == DoubleSpacedKeys4[7]) and stockValueMetricsData[TripleSpacedPriceKeys3[7]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[7]] elif((keyFeature == DoubleSpacedKeys4[8]) and stockValueMetricsData[TripleSpacedPriceKeys3[8]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[8]] elif((keyFeature == DoubleSpacedKeys4[9]) and stockValueMetricsData[TripleSpacedPriceKeys3[0]] != 0): keyData = stockPrice / stockValueMetricsData[TripleSpacedPriceKeys3[0]] # Add any filtering conditions here if (keyData != 0): if (keyFeature == DoubleSpacedKeys4[0]) and (keyData < MIN_PB_RATIO or keyData > MAX_PB_RATIO): return '' # Not reliable, so disabling if (keyFeature == DoubleSpacedKeys4[1]) and (keyData < MIN_PRICE_TO_TANGIBLE_BOOK_RATIO or keyData > MAX_PRICE_TO_TANGIBLE_BOOK_RATIO): return '' if (keyFeature == DoubleSpacedKeys4[2]) and (keyData < MIN_PS_RATIO or keyData > MAX_PS_RATIO): return '' if (keyFeature == DoubleSpacedKeys4[3]) and (keyData < MIN_PRICE_TO_MEDIAN_PS_VALUE or keyData > MAX_PRICE_TO_MEDIAN_PS_VALUE): return '' if (keyFeature == DoubleSpacedKeys4[4]) and (keyData < MIN_PRICE_TO_GRAHAM_NUMBER or keyData > MAX_PRICE_TO_GRAHAM_NUMBER): return '' if (keyFeature == DoubleSpacedKeys4[5]) and (keyData < MIN_PRICE_TO_PETER_LYNCH_VALUE or keyData > MAX_PRICE_TO_PETER_LYNCH_VALUE): return '' if (keyFeature == DoubleSpacedKeys4[6]) and (keyData < MIN_PRICE_TO_INTRINSIC_VALUE_EARNINGS_DCF or keyData > MAX_PRICE_TO_INTRINSIC_VALUE_EARNINGS_DCF): return '' if (keyFeature == DoubleSpacedKeys4[7]) and (keyData < MIN_PRICE_TO_INTRINSIC_VALUE_FCF_DCF or keyData > MAX_PRICE_TO_INTRINSIC_VALUE_FCF_DCF): return '' if (keyFeature == DoubleSpacedKeys4[8]) and (keyData < MIN_PRICE_TO_PROJECTED_FCF_VALUE or keyData > MAX_PRICE_TO_PROJECTED_FCF_VALUE): return '' if (keyFeature == DoubleSpacedKeys4[9]) and (keyData < MIN_PRICE_TO_EARNINGS_POWER_VALUE or keyData > MAX_PRICE_TO_EARNINGS_POWER_VALUE): return '' if (keyFeature == DoubleSpacedKeys4[10]) and (keyData > MAX_PEG_RATIO): return '' if (keyFeature == DoubleSpacedKeys4[11]) and (keyData < MIN_CURRENT_RATIO): return '' if (keyFeature == DoubleSpacedKeys4[12]) and (keyData < MIN_QUICK_RATIO): return '' if (keyFeature == DoubleSpacedKeys4[13]) and (keyData < MIN_CASH_TO_DEBT): return '' if (keyFeature == DoubleSpacedKeys4[15]) and (keyData > MAX_DEBT_TO_EQUITY): return '' if (keyFeature == DoubleSpacedKeys4[16]) and (keyData > MAX_DEBT_TO_EBITDA): return '' if (keyFeature == DoubleSpacedKeys4[17]) and (keyData < MIN_OPERATING_MARGIN_PERCENT): return '' if (keyFeature == DoubleSpacedKeys4[18]) and (keyData < MIN_NET_MARGIN_PERCENT): return '' if (keyFeature == DoubleSpacedKeys4[19]) and (keyData < MIN_ROE_PERCENT): return '' if (keyFeature == DoubleSpacedKeys4[20]) and (keyData < MIN_ROA_PERCENT): return '' filteredStockMetricsData += str(keyData)+',' #stockMetricsData[keyFeature.replace(',', '').replace(':', '').replace(' ', '').replace('\n', '')] = keyData #print(QudrapleSpacedPriceKeys5) # Example: '\nWACC vs ROIC\n', ' ', ' ', ' ', '\nROIC 28.08%\n', '\nWACC 2.85%\n', for keyFeature in QudrapleSpacedPriceKeys5: keyData=find_element_in_list(keyFeature, items_list,4) # print('ROIC', keyData) # Add any filtering conditions here filteredStockMetricsData += str(keyData) + ',' #stockMetricsData['ROIC'] = keyData keyData = find_element_in_list(keyFeature, items_list, 5) # print('WACC', keyData) #Add any filtering conditions here filteredStockMetricsData += str(keyData) + ',' #stockMetricsData['WACC'] = keyData #print(DoubleSpacedKeys6) # 0: ['\nShiller PE Ratio\n','\nPE Ratio\n','\nForward PE Ratio\n','\nPE Ratio without NRI\n','\nEV-to-EBIT\n','\nEV-to-EBITDA\n','\nEV-to-Revenue\n', # 7: '\n3-Year Revenue Growth Rate\n','\n3-Year EBITDA Growth Rate\n','\n3-Year EPS without NRI Growth Rate\n', # 10: '\nPrice-to-Owner-Earnings\n','\nPrice-to-Free-Cash-Flow\n','\nPrice-to-Operating-Cash-Flow\n', # 13: '\nPiotroski F-Score\n', '\nAltman Z-Score\n', '\nBeneish M-Score\n', '\nFinancial Strength\n','\nProfitability Rank\n', '\nValuation Rank\n'] for keyFeature in DoubleSpacedKeys6: keyData=find_element_in_list(keyFeature, items_list,2) # print(keyFeature.replace(',', '').replace(' ', '').replace('\n', ''), keyData) # Add any filtering conditions here if keyData != 0: if (keyFeature == DoubleSpacedKeys6[0]) and (keyData > MAX_SHILLER_PE_RATIO): return '' if (keyFeature == DoubleSpacedKeys6[1]) and (keyData > MAX_PE_RATIO): return '' if (keyFeature == DoubleSpacedKeys6[2]) and (keyData > MAX_FORWARD_PE_RATIO): return '' if (keyFeature == DoubleSpacedKeys6[4]) and (keyData < MIN_EV_to_EBIT or keyData > MAX_EV_to_EBIT) : return '' if (keyFeature == DoubleSpacedKeys6[7]) and (keyData < MIN_3YR_REV_GROWTH_RATE): return '' if (keyFeature == DoubleSpacedKeys6[10]) and (keyData > MAX_PRICE_TO_OWNER_EARNINGS): return '' if (keyFeature == DoubleSpacedKeys6[11]) and (keyData > MAX_PRICE_TO_FREE_CASH_FLOW): return '' if (keyFeature == DoubleSpacedKeys6[12]) and (keyData > MAX_PRICE_TO_OPERATING_CASH_FLOW): return '' filteredStockMetricsData += str(keyData) + ',' #stockMetricsData[keyFeature.replace(',', '').replace(':', '').replace(' ', '').replace('\n', '')] = keyData return filteredStockMetricsData
import urllib.request import urllib.response url = 'http://www.yahoo.com' request = urllib.request.Request(url) response = urllib.request.urlopen(request) the_page = response.read() print(the_page.decode()) #shortened version... the_page = urllib.request.urlopen('http://www.yahoo.com').read() from urllib.parse import urlparse result = urlparse('https://docs.python.org/3/library/index.html') print(result)
def __init__(self, code): """ 构造 :param code: code """ self.__response = {} try: # 通过 code 获取 access_token response = urllib.request.urlopen( 'https://graph.qq.com/oauth2.0/token?' + 'grant_type=authorization_code' + '&client_id=' + qq_param['client_id'] + '&client_secret=' + qq_param['client_secret'] + '&code=' + code + '&redirect_uri=' + qq_param['callback'] ) # 提取 access_token access_token = response.read().decode('utf-8').split('&')[0].split('=')[1] # 使用 access_token 获取用户的 openid response = urllib.request.urlopen( 'https://graph.qq.com/oauth2.0/me?' + 'access_token=' + access_token ) # 解码成 Python 对象 json_obj = json.loads(response.read().decode('utf-8').replace('callback( ', '').replace(' );', '')) open_id = json_obj['openid'] # 使用 openid 获取用户的信息 response = urllib.request.urlopen( 'https://graph.qq.com/user/get_user_info?' + 'access_token=' + access_token + '&oauth_consumer_key=' + qq_param['client_id'] + '&openid=' + open_id ) # 解码成 Python 对象 user_info = json.loads(response.read().decode('utf-8')) # 使用 user_info 中的 nickname 来校验是否登录成功 if user_info['nickname']: self.__response['success'] = True # 查询数据库,看数据库中是否已经有用户数据了 if KUser.objects.filter(user_type='qq', uid=open_id).exists(): # 如果已经有了,就直接取用户出来 k_user = KUser.objects.get(user_type='qq', uid=open_id) else: # 如果没有,那么就将用户存入数据库 k_user = KUser( user_type='qq', uid=open_id, nickname=user_info['nickname'], avatar=user_info['figureurl_qq_1'], is_admin=False ) k_user.save() self.__response['user_info'] = { 'user_type': k_user.user_type, 'uid': k_user.uid, 'nickname': k_user.nickname, 'avatar': k_user.avatar, 'is_admin': k_user.is_admin, 'pk': k_user.pk } else: self.__response['success'] = False except Exception: self.__response['success'] = False return