def page_scraper(ib_exchange_url): ''' It takes a page url (Interactive Brokers) It returns a dictionary mapping each symbol with a tuple (company name, symbol, currency) ''' page = 0 symbols = {} while True: page += 1 time.sleep(1.01) print(page) url = ib_exchange_url + str(page) valid_page = False try: raw_html = simple_get(url) soup = BeautifulSoup(raw_html, 'html.parser') a = soup.find_all('tr') for tr in a: tr_str = str(tr).replace('\n', '') if tr_str[0:8] == '<tr><td>': symbol_data = parse_ib_symbol_record(tr_str) symbols[symbol_data[0]] = tuple ([symb for symb in symbol_data[1:]]) valid_page = True except: break if valid_page == False: break return symbols
def pol_scrape(url): html = web_scraper.simple_get(url) soup = bs4.BeautifulSoup(html, 'html.parser') text = [] for peas in soup.select('div[class="story-text "] > p'): text.append(peas.text) result = ' '.join(text) return result
def wpt_scrape(url): html = web_scraper.simple_get(url) soup = bs4.BeautifulSoup(html, 'html.parser') text = [] for peas in soup.select('p'): text.append(peas.text) result = ' '.join(text) return result
def abc_scrape(url): html = web_scraper.simple_get(url) soup = bs4.BeautifulSoup(html, 'html.parser') text = [] for peas in soup.select('p[itemprop="articleBody"]'): text.append(peas.text) result = ' '.join(text) return result
def nyt_scrape(url): html = web_scraper.simple_get(url) soup = bs4.BeautifulSoup(html) text = [] for peas in soup.find_all('p'): try: if 'css-1i0edl6' in peas['class']: text.append(peas.text) except: continue result = ' '.join(text) return result
def cnn_scrape(url): html = web_scraper.simple_get(url) soup = bs4.BeautifulSoup(html) text = [] for peas in soup.find_all('meta'): try: if 'description' in peas['itemprop']: text.append(peas['content']) except: continue for peas in soup.find_all('div'): try: if 'zn-body__paragraph' in peas['class']: text.append(peas.text) except: continue result = ' '.join(text) return result