req = requests.get(link) # In[5]: noticias = BeautifulSoup(req.text, "html.parser").find_all( 'div', class_='card large card-medium') noticias += BeautifulSoup(req.text, "html.parser").find_all('div', class_='card card-small') noticias += BeautifulSoup(req.text, "html.parser").find_all('div', class_='card card-xsmall') # In[6]: for noticia in noticias: ref_link = noticia.find_all('a', href=True)[0]['href'] print(noticia.find_all('a', href=True)[0]['href']) print(link_site + ref_link) if (link_site not in ref_link): util_midia.social_news_from_link(link_site + ref_link) print(link_site + ref_link) else: util_midia.social_news_from_link(ref_link) print(ref_link) # In[7]: noticias[0]
import requests import datetime import util_midia link = 'https://diplomatique.org.br/' req = requests.get(link) bs = BeautifulSoup(req.text, "html.parser") materias = bs.find_all('div', class_='owl-carousel') for div in materias[1:]: try: page_link = div.find_all('a', href=True)[0]['href'] util_midia.social_news_from_link(page_link) # article = NewsPlease.from_url(page_link) # print(article.title) # row = {'titulos': [], 'links': [], 'noticia': [], 'image': [], 'abstract': [], 'date': []} # if (article is not None): # row['titulos'].append(article.title) # row['noticia'].append(article.text) # row['links'].append(article.url) # row['abstract'].append(article.text) # row['date'].append(article.date_publish) # path_image = article.image_url # if path_image == '' or path_image == None: # row['image'].append(0) # else: # row['image'].append(download_and_move_image(article.image_url))
from lexical_analyzer_package import midia_lexical from midia_postagem import midia_post from Model.News import News from Database import midia_table from newsplease import NewsPlease from bs4 import BeautifulSoup import requests import util_midia import datetime # In[2]: link = 'http://www.ebc.com.br' # In[3]: req = requests.get(link) # In[4]: noticias = BeautifulSoup(req.text, "html.parser").find_all( 'div', class_='cmpGeneric isoGrid-item col-lg-4 col-md-4 col-sm-12 col-xs-12') # In[5]: for noticia in noticias: print(noticia.find_all('a', href=True)[0]['href']) util_midia.social_news_from_link( noticia.find_all('a', href=True)[0]['href'])
import util_midia link = 'https://operamundi.uol.com.br/' req = requests.get(link) bs = BeautifulSoup(req.text, "html.parser") materias = bs.find('div', class_='news-grid').find_all('a', href=True) materias += bs.find('div', class_='col-xs-12 col-md-6 home-grid').find_all('a', href=True) materias += bs.find('div', class_='col-xs-12 news-grid').find_all('a', href=True) materias += bs.find('div', class_='row news-grid').find_all('a', href=True) for materia in materias: util_midia.social_news_from_link(materia['href']) # article = NewsPlease.from_url(materia['href']) # print(article.title) # row = {'titulos': [], 'links': [], 'noticia': [], 'image': [], 'abstract': [], 'date': []} # if (article is not None): # row['titulos'].append(article.title) # row['noticia'].append(article.text) # row['links'].append(article.url) # row['abstract'].append(article.text) # row['date'].append(article.date_publish) # path_image = article.image_url # if path_image == '' or path_image == None: # row['image'].append(0) # else: # row['image'].append(download_and_move_image(article.image_url))
from lexical_analyzer_package import midia_lexical from midia_postagem import midia_post from Model.News import News from Database import midia_table from newsplease import NewsPlease from bs4 import BeautifulSoup import requests import util_midia # In[2]: link = 'https://extra.globo.com/noticias/plantao.html' # In[3]: req = requests.get(link) # In[4]: noticias = BeautifulSoup(req.text, "html.parser").find_all('div', class_='text') # In[ ]: for noticia in noticias: print(noticia.find_all('a', href=True)[0]['href']) ref_link = noticia.find_all('a', href=True)[0]['href'] util_midia.social_news_from_link(ref_link)