def check(self) -> str: self.conn = "" url = "https://www.youtube.com" timeout = 5 try: response = op(url, timeout=timeout) self.conn = str(response) except: self.conn = str(response) return self.conn
def Update(): conn = op("https://flyingatom.com/bankomaty-bitcoin") page = conn.read() conn.close() city_info = [] p_soup = soup(page, "html.parser") container = p_soup.find_all("tbody") table_row = container[0].find_all('tr') for i in range(0, len(table_row)): city = (table_row[i].find_all("td")) city_info.append({ "Miasto": city[0].text, "Ulica": city[1].text, "Wartosc": city[3].text, "URL_Mapa": city[4].a['href'] }) city_info = sorted(city_info, key=itemgetter('Miasto')) return city_info
from urllib.request import urlopen as op import bs4 as bs url = 'http://www.nu.nl' with op(url) as connection: raw_data = connection.read() soup = bs.BeautifulSoup(raw_data, 'lxml') ## use the nav element to navigate around the website. ## find a specific class with soup.find_all('<tag>', class_='<someclass>') ## hyperlinks are embodied by a tags div = soup.find('div', class_='column first') print(div.ul) # with open("C:/Users/David.MIDDENAARDE/Documents/web-scraping_output.txt", 'w') as doc: # for paragraph in soup.find_all('div', class_='block headline'): # #pass # doc.write(paragraph.text) # print(paragraph) # print(paragraph.get('href')) # print(paragraph.a.get('href')) #print(soup.get_text())
from urllib.request import urlopen as op html = op("http://pythonscraping.com/pages/page1.html") html2 = op("http://finc8888.pythonanywhere.com") html3 = op("https://time100.ru/Moscow") #print(html.read()) #print(html2.read()) print(html3.read())
from urllib.request import urlopen as op from bs4 import BeautifulSoup as soup url = "https://brokers.ru/grafik-foreks-online/aud-usd-kurs" page = op(url) data = page.read() page.close() soup1 = soup(data, "html.parser") div = soup1.find('span', class_="tv-widget-chart__price-value.symbol-last") print(data)