def main(): sys.stdout.write(u'正在努力请求节目单...') sys.stdout.flush() data = common.open_url(list_url) if not data: return menu_list = json.loads(data)['list'] sys.stdout.write('\r') list_format = u'[{title}] by {author} | {player} {min:02}:{sec:02}' print u'{0:*^60}'.format(u'悦读FM.倾听文字的声音') print u'总共%d期.最新10期:'%len(menu_list) for i in range(0,10): print i,list_format.format(**menu_list[i]) print u"\n输入序号下载,以','分开.'q'退出" while 1: usr_input = raw_input('Select(0-%d):'%(len(menu_list)-1)) if usr_input == 'q': print 'bye!' break try: li = map(int, usr_input.split(',')) except: print 'Input Error!' for i in li: if 0 <= i < len(menu_list): common.download(menu_list[i]['mp3'], _TARGET,\ menu_list[i]['title'], 'mp3', Referer='http://yuedu.fm/') article2Html(i, menu_list[i]['title'])
def article2Html(num, filename): data = common.open_url(item_url%num) item = json.loads(data)['item'][0] #common.download(item['bg'], _TARGET, filename, 'jpg') with codecs.open('%s/%s.html'%(_TARGET, filename), 'w', 'utf-8') as fd: fd.write('<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>') fd.write(item['text']) fd.write('</body></html>')
def replays(url): items = [] link = common.open_url(url) link = link.replace('\n', '') match = re.compile( '<script type="application/ld\+json">(.+?)</script>').findall(link) for data in match: if 'location' not in data: pass else: data = str(data) data = json.loads(data) for i in data: try: url = i['offers']['url'] except: pass try: date = i['startDate'][:10] except: pass try: price = i['offers']['price'] except: pass try: img = i['image'] except: pass try: title = i['name'] except: pass if url not in items: items.append(url) title = '[B]%s[/B] | %s' % (date, title) if price == '0': common.add_link(title, url, 1, img) else: pass else: pass more = re.compile( '<div class="show-more"><a href="(.+?)">Show more').findall(link) for url in more: img = base_img title = '[I]More...[/I]' url = 'https://www.fite.tv%s' % url common.add_dir(title, url, 2, img)
def test_price_sanity(self): price = self.product.get_price() node = self.browse_nodes.Items.Item.OfferSummary if hasattr(node, "LowestNewPrice"): our_price = node.FormattedPrice else: html_text = open_url(self.product.page_url).text soup = BeautifulSoup(html_text) our_price = soup.findAll("span", id="priceblock_ourprice")[0].text self.assertTrue(our_price == price)
def download_pic(url): print url html = common.open_url(url) find_re = re.compile(r'<li id.+?<img src="(.+?)"', re.DOTALL) img_url = find_re.findall(html) print 'Start download %d pics'%len(img_url) for url in img_url: if url: filename,ext = os.path.splitext(os.path.split(url)[-1]) if not ext: ext = '.jpg' common.download(url, TARGET, filename, ext[1:], Referer=url)
def download_show(li): for num in li: if num > 296: url = xml_url_1%num else: url = xml_url_2%num xml_data = common.open_url(url) if xml_data: songlist = extract(xml_data) target_dir = TARGET%num for title, location in songlist: ext = location.split('.')[-1] common.download(location, target_dir, title, ext, Referer=referer%num)
def _upload_image(self, img_url): """ Read the binary img and let the XMLRPC library encode it into base64. :param img_url: The url to img file. :return: attachment_id. """ content_file = open_url(img_url).content data = { 'name': 'picture.jpg', 'type': 'image/jpeg', 'bits': xmlrpc_client.Binary(content_file) } response = self.client.call(media.UploadFile(data)) return response['id']
def test_review_sanity(self): review = self.product.get_review() five_stars_review_url = "http://www.amazon.com/product-reviews/{0}/?ie=" "UTF8&filterBy=addFiveStar".format( self.product.ASIN ) html_text = open_url(five_stars_review_url).text soup = BeautifulSoup(html_text) all_reviews = soup.findAll("span", "a-size-base review-text") if len(all_reviews) == 0: self.assertTrue("null" == review) all_reviews = [R.text for R in all_reviews] all_reviews = sorted(all_reviews, key=lambda word: len(word), reverse=True) self.assertTrue(all_reviews[0].encode("utf-8") == review)
def get_review(self): """ Find the longest review with five stars. :return: Review - str. """ if self.review: return self.review html_text = open_url(self.five_stars_review_url).text soup = BeautifulSoup(html_text) all_reviews = soup.findAll("span", "a-size-base review-text") if len(all_reviews) > 0: all_reviews = [review.text for review in all_reviews] all_reviews = sorted(all_reviews, key=lambda word: len(word), reverse=True) self.review = all_reviews[0].encode('utf-8') return self.review self.review = 'null' return self.review
def __init__(self, product_item, browse_nodes): self.browse_nodes = browse_nodes self.ASIN = product_item.ASIN.text self.page_url = unquote(product_item.DetailPageURL.text) self.title = product_item.ItemAttributes.Title.text.encode('utf-8') html_text = open_url(self.page_url).text self.soup = BeautifulSoup(html_text) self.categories = [] self.img_urls = {} self.price = '' self.rating = '' self.review = '' self.features = '' self.num_of_reviews = None self.five_stars_review_url = 'http://www.amazon.com/product-reviews/{0}/?ie=' \ 'UTF8&filterBy=addFiveStar&tag={1}'.format(self.ASIN, CONFIG['associate_tag']) try: self.manufacturer = str(product_item.ItemAttributes.Manufacturer) except: self.manufacturer = 'null'
def main(): if len(sys.argv) < 3 or (sys.argv[1] != '-t' and len(sys.argv) > 3): help_info() return if sys.argv[1] == '-a': url = _albumUrl % sys.argv[2] elif sys.argv[1] == '-c': url = _collectUrl % sys.argv[2] elif sys.argv[1] == '-t': url = _trackUrl % ','.join(sys.argv[2:]) else : help_info() return content = common.open_url(url) if not content: return res = extract(content) for title,uri,lrc in res: common.download(uri,TARGET,title,'mp3') if lrc:
def main(): print u'无聊图请按1,妹纸图请按2, 其它自动挂机。' choice = raw_input('>') global TARGET if choice == '1': url_default = wuliao_default url_page = wuliao_page TARGET = os.path.join(TARGET, 'jandan-pic') elif choice == '2': url_default = meizi_default url_page = meizi_page TARGET = os.path.join(TARGET, 'jandan-ooxx') else: print 'bye!' return html = common.open_url(url_default) find_RE = re.compile(r'>\[(.+?)\]') result = find_RE.findall(html) cur_page = int(result[0]) print 'Current Page Number:%d'%cur_page cnt = int(raw_input('How many pages do you want to download? \n>')) for i in range(0,cnt): download_pic(url_page%(cur_page-i))
def test_get_price(self): rating = self.product.get_rating() html_text = open_url(self.product.page_url).text soup = BeautifulSoup(html_text) our_rating = soup.findAll("div", id="avgRating")[0].span.a.span.text[:3] self.assertTrue(our_rating == rating)