def urlretrieve(url, filename): filename = unquote(filename) try: ur(url, filename) except Exception as e: print(e) print("Done : " + filename)
def downloadimg(articles): for article in articles: #遍历每页的标题和链接以及图片地址,下载图片在电脑上 print(article.text, article['href']) if not os.path.isdir(os.path.join('download', article.text)): os.mkdir(os.path.join('download', article.text)) res = requests.get('https://www.ptt.cc' + article['href']) images = reg_imgur_file.findall(res.text) print(images) for image in set(images): #保存图片 ID = re.search('http[s]?://[i.]*imgur.com/(\w+\.(?:jpg|png|gif))', image).group(1) print(ID) #档案 ur(image, os.path.join('download', article.text, ID))
def get_top_machts(): html = ur('http://football.kulichki.net/') bs = b(html.read()) div = bs.find('div', {"class": 'col2 inl vtop'}).center.table tr_list = div.find_all('tr') result = '' for item in tr_list[1:]: if item.find('span') is not None: flag = plus_flag(item.find('span').text) plus_flag(flag) result = result + flag + '\n' else: a = item.find('p', {"align": "left"}).text a = a.replace('\n', '') a = a.replace(' ', ' ') matchtime = a[1:a.index('.')] timeplus = (int(matchtime[:2]) + 2) % 24 timeplus = str(timeplus) if len(timeplus) == 1: timeplus = '0' + timeplus matchname = a[a.index('.') + 2:a.rindex('-')] result = result + '*' + timeplus + matchtime[ 2:] + '* _' + matchname + '_\n' return result
async def nine_nine(ctx, code: str): if code == "Lucia Crimson Abyss": translator = Translator() my_url = "https://wiki.biligame.com/zspms/%E9%9C%B2%E8%A5%BF%E4%BA%9A%C2%B7%E6%B7%B1%E7%BA%A2%E4%B9%8B%E6%B8%8A" uClient = ur(my_url) page_html = uClient.read() uClient.close() page_soup = soup(page_html, "html.parser") containers_1 = page_soup.findAll("div", {"class": "tj-left"}) x = containers_1[0].findAll("table", {"class": "wikitable"}) y = x[0].findAll("td") count = 0 st = "" for i in y: a = translator.translate(str(i.text)) st += a.text + "\n" count += 1 if count % 3 == 0: await ctx.send("```{}```".format(st)) st = ""
'rating' DECIMAL(2,1) )''') cur.execute('''CREATE TABLE IF NOT EXISTS Specs( 'id' TEXT,"In The Box" TEXT, "Model Number" TEXT, "Model Name" TEXT, "Color" TEXT, "Browse Type" TEXT, "SIM Type" TEXT, "Hybrid Sim Slot" TEXT, "Touchscreen" TEXT, "OTG Compatible" TEXT, "Sound Enhancements" TEXT, "Display Size" TEXT, "Resolution" TEXT, "Resolution Type" TEXT, "GPU" TEXT, "Display Colors" TEXT, "Other Display Features" TEXT, "Operating System" TEXT, "Processor Type" TEXT, "Processor Core" TEXT, "Primary Clock Speed" TEXT, "Operating Frequency" TEXT, "Internal Storage" TEXT, "RAM" TEXT, "Expandable Storage" TEXT, "Supported Memory Card Type" TEXT, "Memory Card Slot Type" TEXT, "Primary Camera Available" TEXT, "Primary Camera" TEXT, "Primary Camera Features" TEXT, "Secondary Camera Available" TEXT, "Secondary Camera" TEXT, "Secondary Camera Features" TEXT, "Flash" TEXT, "HD Recording" TEXT, "Full HD Recording" TEXT, "Video Recording" TEXT, "Video Recording Resolution" TEXT, "Frame Rate" TEXT, "Dual Camera Lens" TEXT, "Phone Book" TEXT, "Network Type" TEXT, "Supported Networks" TEXT, "Internet Connectivity" TEXT, "3G" TEXT, "Pre-installed Browser" TEXT, "Micro USB Port" TEXT, "Bluetooth Support" TEXT, "Bluetooth Version" TEXT, "Wi-Fi" TEXT, "Wi-Fi Hotspot" TEXT, "USB Connectivity" TEXT, "Audio Jack" TEXT, "Map Support" TEXT, "GPS Support" TEXT, "Smartphone" TEXT, "SIM Size" TEXT, "User Interface" TEXT, "Removable Battery" TEXT, "SMS" TEXT, "Graphics PPI" TEXT, "Sensors" TEXT, "Other Features" TEXT, "Important Apps" TEXT, "FM Radio" TEXT, "Audio Formats" TEXT, "Video Formats" TEXT, "Battery Capacity" TEXT, "Width" TEXT, "Height" TEXT, "Depth" TEXT, "Weight" TEXT, "Warranty Summary" TEXT, "Quick Charging" TEXT, "Display Type" TEXT, "Wi-Fi Version" TEXT, "Infrared" TEXT, "Secondary Clock Speed" TEXT, "USB Tethering" TEXT, "MMS" TEXT, "Video Call Support" TEXT, "Micro USB Version" TEXT, "Battery Type" TEXT, "3G Speed" TEXT, "GPRS" TEXT, "EDGE" TEXT, "GPRS Features" TEXT, "EDGE Features" TEXT, "Voice Input" TEXT, "SIM Access" TEXT, "Call Log Memory" TEXT, "Speaker Phone" TEXT, "Speed Dialing" TEXT, "WAP" TEXT, "WAP Version" TEXT, "Touchscreen Type" TEXT, "Predictive Text Input" TEXT, "User Memory" TEXT, "Supported Languages" TEXT, "Browser" TEXT, "FM Radio Recording" TEXT, "NFC" TEXT, "Keypad Type" TEXT, "Dual Battery" TEXT, "Business Phone" TEXT, "Image Editor" TEXT, "Call Wait/Hold" TEXT, "Conference Call" TEXT, "Hands Free" TEXT, "Call Divert" TEXT, "Call Timer" TEXT, "Social Networking Phone" TEXT, "Instant Message" TEXT, "Series" TEXT, "Phone Book Memory" TEXT, "SMS Memory" TEXT, "JAVA Support" TEXT, "Games" TEXT, "Optical Zoom" TEXT, "Call Records" TEXT, "Logs" TEXT, "Keypad" TEXT, "Music Player" TEXT, "Warranty Service Type" TEXT, "Digital Zoom" TEXT, "Mini USB Port" TEXT, "Mini HDMI Port" TEXT, "HD Game Support" TEXT, "Total Memory" TEXT, "Hot SWAP Support" TEXT, "Mini USB Version" TEXT, "TV Out" TEXT, "Mobile Tracker" TEXT, "Java Application" TEXT, "Ringtones Format" TEXT, "Domestic Warranty" TEXT, "International Warranty" TEXT, "Covered in Warranty" TEXT, "Talk Time" TEXT, "Upgradable Operating System" TEXT, "DLNA Support" TEXT, "Not Covered in Warranty" TEXT, "Additional Content" TEXT)''') cur.execute('''SELECT * FROM Mobile ORDER BY id DESC LIMIT 1''') idnew = int(cur.fetchone()[0]) + 1 conn.commit() cur.close() url1 = 'https://www.flipkart.com/search?sid=tyy%2C4io&otracker=CLP_Filters&page=' url2 = 'https://www.flipkart.com' for i in range(1, 20): conn = sqlite3.connect('mobiles.sqlite') cur = conn.cursor() html1 = ur(url1+str(i), context=ctx).read() soup1 = bs(html1,'html.parser') lst1 = soup1.findAll('a',{'class':'_31qSD5'}) for a in lst1: html1 = ur(url2+a['href'], context=ctx).read() soup1 = bs(html1, 'html.parser') lst2 = soup1.findAll('tr',{'class':'_3_6Uyw row'}) specs1 = dict() for li in lst2: specs1[li.find(class_='_3-wDH3 col col-3-12').get_text()]=li.find(class_='_3YhLQA').get_text() if(specs1.get('Model Number',0)): cur.execute('SELECT * FROM Specs WHERE "Model Number"=?', (specs1['Model Number'], )) if cur.fetchone(): continue try: lst = re.findall('^\S(\S+),(\S+)',a.find(class_='_1vC4OE _2rQ-NK').get_text())[0] lst = lst[0]+lst[1]
async def nine_nine(ctx): channel = bot.get_channel(615955484347990019) names = """ 1. Shadow Strike-Reverse 2. Zero Degree Calibration 3. Chimera Retrograde 4. Zero Form 5. Sirius 6. Red Sakura 7. Ray Mill 8. God Giver 9. Dragon Wind 10. Wei Zi 11. Soul Slayer 12. Frenzied Fusion Canon 13. Leitning 14. Great God Power 15. Calender 16. Red Lotus Fanatic 17. Machine Rhyme 18. Wolf Eater 19. Nuclear-based red dragon 20. Sain 21. Pitch Black""" choice_embed = discord.Embed( title= "Following are the weapons in Punishing Gray Raven which come under 6-star rarity:", description=names) await channel.send(embed=choice_embed) def check(m): return m msg = await bot.wait_for('message', check=check) title = msg.content mydb = mysql.connector.connect(host="localhost", user="******", passwd=password, database="Punishing Gray Raven", auth_plugin='mysql_native_password') mycursor = mydb.cursor() mycursor.execute("SELECT * FROM Arms where Name = '{}';".format(title)) myresult = mycursor.fetchall() name = myresult[0][0] link = myresult[0][1] uClient = ur(link) page_html = uClient.read() uClient.close() page_soup = soup(page_html, "html.parser") con = page_soup.findAll("table", {"class": "wikitable"}) ans = con[0].findAll("img", {"class": "img-kk"}) url = ans[0]["src"] wtype = myresult[0][2] rare = myresult[0][3] rr = "" for i in range(rare): rr += "✰" skill = myresult[0][4] embed = discord.Embed(title=name, description=" Details", color=0x00ff00) embed.set_thumbnail(url=url) embed.add_field(name="Type", value=wtype, inline=False) embed.add_field(name="Rarity", value=rr, inline=False) embed.add_field(name="Skills", value=skill, inline=False) await channel.send(embed=embed)
async def nine_nine(ctx): channel = bot.get_channel(615955484347990019) names = """ 1. Guinnea 2. Hannah 3. Einstein 4. Frederick 5. Adolf 6. Condelina 7. Leonardo 8. Shakespeare 9. Patton 10. Catherine 11. Heisen 12. Basilone 13. Fellete 14. Darwin""" choice_embed = discord.Embed( title= "Following are the memories in Punishing Gray Raven which come under 6-star rarity:", description=names) await channel.send(embed=choice_embed) def check(m): return m msg = await bot.wait_for('message', check=check) title = msg.content mydb = mysql.connector.connect(host="localhost", user="******", passwd=password, database="Punishing Gray Raven", auth_plugin='mysql_native_password') mycursor = mydb.cursor() mycursor.execute("SELECT * FROM Memory where Name = '{}';".format(title)) myresult = mycursor.fetchall() name = myresult[0][0] link = myresult[0][1] uClient = ur(link) page_html = uClient.read() uClient.close() page_soup = soup(page_html, "html.parser") con = page_soup.findAll("table", {"class": "wikitable"}) ans = con[0].findAll("img", {"class": "img-kk"}) url = ans[0]["src"] rarity = myresult[0][2] two_piece_eff = myresult[0][3] four_piece_eff = myresult[0][4] hp = myresult[0][5] crit = myresult[0][6] atk = myresult[0][7] Def = myresult[0][8] rr = "" for i in range(rarity): rr += "✰" embed = discord.Embed(title=name, description=" Details", color=0x00ff00) embed.set_thumbnail(url=url) embed.add_field(name="Rarity", value=rr, inline=False) embed.add_field(name="2-piece effect", value=two_piece_eff, inline=False) embed.add_field(name="4-piece effect", value=four_piece_eff, inline=False) embed.add_field(name="HP", value=hp, inline=True) embed.add_field(name="CRIT", value=crit, inline=True) embed.add_field(name="ATK", value=atk, inline=True) embed.add_field(name="DEF", value=Def, inline=True) await channel.send(embed=embed)
t2 = soup.findAll("div", {"class": "location"}) t3 = soup.findAll("span", {"class": "company"}) for link, loc, comp in zip(t1, t2, t3): absolute_link = urljoin(url, link.get("href")) company = comp.text.strip() title = link.get("title") location = loc.text if (company in companies and title in titles): # get rids of duplicate/spam job posting continue titles.append(title) companies.append(company) signal.alarm(5) try: uClient = ur(absolute_link) #opens site, and gets page pageText = uClient.read() # html uClient.close() #closes sites linkCount += 1 pageSoup = bs(pageText, "html.parser") #html parser print(linkCount) intro = pageSoup.findAll("p") # job description middle = pageSoup.findAll("li") # tech for i in middle: if (str(i)[3] == ' '): pass else: line = (str(i)[4:-5]).lower() line = re.sub(r"[^a-zA-Z0-9#]+", ' ', line) for t1, t2 in now_next(line.split()): if (t1 in tech and techFound[t1] == False):
from bs4 import BeautifulSoup as bs from urllib.request import urlopen as ur web_url = 'https://www.flipkart.com/search?q=boat+headphones&sid=0pm%2Cfcn&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_1_15_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_1_15_na_na_na&as-pos=1&as-type=RECENT&suggestionId=boat+headphones%7CHeadphones+%26+Earphones&requestId=493a648d-a9ab-4451-8dcd-0ad615e7ea65&as-searchtext=%20boat%20headphones' with ur(web_url) as url: page_html = url.read() soup = bs(page_html, 'html.parser') containers = soup.findAll('div', {'class': '_3liAhj'}) print('Total count is ', len(containers)) #print(bs.prettify(containers[0])) product_number = 1 for container in containers: name = container.div.img['alt'] product_price = container.findAll('div', {'class': '_1vC4OE'}) spliting_price = product_price[0].text.replace(',', '').split('₹') price = 'Rs.' + spliting_price[1] product_rating = container.findAll('div', {'class': 'hGSR34'}) rating = product_rating[0].text print('Product No.:', product_number) print('Name : ' + name) print('Price : ' + price)
from urllib.parse import urljoin jobList = list() job = input("Enter job title:") jobSplit = job.split(" ") if (len(jobSplit) == 2): url = "https://www.indeed.com/jobs?q={}+{}&start=".format( jobSplit[0], jobSplit[1]) elif (len(jobSplit) == 3): url = "https://www.indeed.com/jobs?q={}+{}+{}&start=".format( jobSplit[0], jobSplit[1], jobSplit[2]) else: raise Exception('Invalid job title') uClient = ur(url) pageText = uClient.read() uClient.close() pageSoup = bs(pageText, "html.parser") avg = pageSoup.find(attrs={"id": "univsrch-salary-currentsalary"}) minimum = pageSoup.find( attrs={"class": "univsrch-sal-min univsrch-sal-caption float-left"}) maximum = pageSoup.find( attrs={"class": "univsrch-sal-max univsrch-sal-caption float-right"}) avgAmount = "" minAmount = "" maxAmount = "" for i in avg.text:
# -*- coding: utf-8 -*- """ Created on Mon Jul 9 19:35:29 2018 @author: Shanmukha """ from urllib.request import urlopen as ur from bs4 import BeautifulSoup as BS my_url='https://www.newegg.com/global/in/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description=graphics+card&ignorear=0&N=-1&isNodeId=1' #opening up connection and grabing the information uclient=ur(my_url) page_html=uclient.read() uclient.close() #html parsing page_soup=BS(page_html,"html.parser") #grab each product information containers=page_soup.findAll("div",{"class":"item-container"}) filename="scrap1_newegg.csv" f=open(filename,"w") headers="brand,product_name,shipping\n" f.write(headers) for container in containers: brand=container.div.div.a.img['title'] title_container=container.findAll("a",{"class":"item-title"}) product_name=title_container[0].text shipping_container=container.findAll("li",{"class":"price-ship"}) shipping=shipping_container[0].text.strip() print("brand: "+ brand) print("product_name: "+ product_name) print("shipping:"+ shipping)
# -*- coding: utf-8 -*- """ Created on Sat Aug 27 14:02:56 2016 @author: SRINIVAS """ import simplejson import urllib.request.Request as ur import urllib.request.urlopen as ure import urllib url = "https://www.virustotal.com/vtapi/v2/file/rescan" parameters = {"resource": "99017f6eebbac24f351415dd410d522d, 7896b9b34bdbedbe7bdc6d446ecb09d5", "apikey": "069ee921004cfe52f111a8c8d382ab20cd678b902ee303b8e0b9b7aa7b9053f7"} data = urllib.urlencode(parameters) req = ur(url, data) response = ure(req) json = response.read() print(json)
print(req) print(req.text) bs = BeautifulSoup(req.text, 'html5lib') div = bs.find('div', id='js-item-start') print(div) ur(div.attrs['data-image'], fmat(name)) ''' for i in range(500): try: f = '/'.join(fmat(name).split('/')[:-1]) if not os.path.isdir(f): os.makedirs(f) #ur(BeautifulSoup(r.get(fmat(url)).text, 'html5lib').find('div', id='js-item-start').attrs['data-image'], fmat(name)) ur( BeautifulSoup(r.get(fmat(url)).text, 'html5lib').find( 'picture', class_='item-comic-image').find('img').attrs['src'], fmat(name)) print('saved to', fmat(name)) except Exception as e: # raise e print(f'got {e}, trying a mod') finally: d = str(int(d) + 1) if int(d) > 31: d = '1' m = str(int(m) + 1) if int(m) > 12: m = '1' y = str(int(y) + 1)