def obtain_speech_text(speech_list, soup):
    '''In this step we collect all the presidential speeches from the urls obtained in the step before'''
    for speech_url in tqdm(speech_list):
        get_url = urljoin(url, speech_url)
        head = randomUserAgents()
        soup = bs(get_url, head)
        title = soup.find("h2").text
        date_string = soup.find("time", {"class": "pull-right"}).text.strip()
        try:
            date = datetime.strptime(date_string, '%A %d de %B de %Y')
        except ValueError:
            date = datetime(1990, 1, 1)
        body = soup.find("article").text.strip()
        presidential_speech.append((speech_url, title, date, body))
Exemplo n.º 2
0
import time
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.common.keys import Keys  # for necessary browser action
from selenium.webdriver.common.by import By  # For selecting html code
import lxml

from userAgents import user_agents, randomUserAgents

start = time.time()

url = 'https://play.google.com/store/apps/collection/topselling_free'
head = randomUserAgents()

driver = webdriver.Chrome()
driver.get('{}'.format(url))
time.sleep(2)

for i in range(0, 60):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    try:
        showMore = driver.find_element_by_css_selector('#show-more-button')
        showMore.click()
    except:
        continue

pageSource = driver.page_source
bs = BeautifulSoup(pageSource)
count = 0
Exemplo n.º 3
0
import time
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.common.keys import Keys   # for necessary browser action
from selenium.webdriver.common.by import By    # For selecting html code
import lxml

from userAgents import user_agents, randomUserAgents

start = time.time()

url = 'https://play.google.com/store/apps/collection/topselling_free'
head = randomUserAgents()

driver = webdriver.Chrome()
driver.get('{}'.format(url))
time.sleep(2)

for i in range(0,60):
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(2)
	try:
		showMore = driver.find_element_by_css_selector('#show-more-button')
		showMore.click()
	except:
		continue

pageSource = driver.page_source
bs = BeautifulSoup(pageSource)
count = 0