Пример #1
0
def page_scraper(ib_exchange_url):
    '''
    It takes a page url (Interactive Brokers)
    It returns a dictionary mapping each symbol with a tuple
    (company name, symbol, currency)
    '''
    page = 0
    symbols = {}
    while True:
        page += 1
        time.sleep(1.01)
        print(page)
        url = ib_exchange_url + str(page)
        valid_page = False
        try:
            raw_html = simple_get(url)
            soup = BeautifulSoup(raw_html, 'html.parser')
            a = soup.find_all('tr')
            for tr in a:
                tr_str = str(tr).replace('\n', '')
                if tr_str[0:8] == '<tr><td>':
                    symbol_data = parse_ib_symbol_record(tr_str)
                    symbols[symbol_data[0]] = tuple ([symb for symb in symbol_data[1:]])
                    valid_page = True
        except:
            break
        if valid_page == False: break
    return symbols
Пример #2
0
def pol_scrape(url):
    html = web_scraper.simple_get(url)
    soup = bs4.BeautifulSoup(html, 'html.parser')
    text = []
    for peas in soup.select('div[class="story-text "] > p'):
        text.append(peas.text)

    result = ' '.join(text)
    return result
Пример #3
0
def wpt_scrape(url):
    html = web_scraper.simple_get(url)
    soup = bs4.BeautifulSoup(html, 'html.parser')
    text = []
    for peas in soup.select('p'):
        text.append(peas.text)

    result = ' '.join(text)
    return result
Пример #4
0
def abc_scrape(url):
    html = web_scraper.simple_get(url)
    soup = bs4.BeautifulSoup(html, 'html.parser')
    text = []
    for peas in soup.select('p[itemprop="articleBody"]'):
        text.append(peas.text)

    result = ' '.join(text)
    return result
Пример #5
0
def nyt_scrape(url):
    html = web_scraper.simple_get(url)
    soup = bs4.BeautifulSoup(html)
    text = []
    for peas in soup.find_all('p'):
        try:
            if 'css-1i0edl6' in peas['class']:
                text.append(peas.text)
        except:
            continue

    result = ' '.join(text)
    return result
Пример #6
0
def cnn_scrape(url):
    html = web_scraper.simple_get(url)
    soup = bs4.BeautifulSoup(html)
    text = []
    for peas in soup.find_all('meta'):
        try:
            if 'description' in peas['itemprop']:
                text.append(peas['content'])
        except:
            continue

    for peas in soup.find_all('div'):
        try:
            if 'zn-body__paragraph' in peas['class']:
                text.append(peas.text)
        except:
            continue

    result = ' '.join(text)
    return result