def get_odds_info(html, odds_info_url):
    app_logger.info(f'Start stat parse')
    soup = BeautifulSoup(html, 'lxml')
    event_info = get_event_info(html)
    pinnacle_row = soup.select('tr#oddstr_177')
    sbobet_row = soup.select('tr#oddstr_474')
    betfair_row = soup.select('tr#oddstr_2')
    xbet_row = soup.select('tr#oddstr_1047')
    marathon_row = soup.select('tr#oddstr_816')
    odds_rows = [pinnacle_row, sbobet_row,
                 betfair_row, xbet_row,
                 marathon_row]
    bookms = ['pinnacle', 'sbobet', 'betfair', '1xbet', 'marathon']
    odds_data = []
    for i, odds_row in enumerate(odds_rows):
        try:
            odds_url = odds_row[0].select('td')[2]['onclick'].split("'")[1]
            odds_change_info = get_odds_change(get_html(odds_url), odds_info_url)
            odds_data.append({**event_info, **odds_change_info})
        except Exception:
            app_logger.info(f'Received odds info on {odds_info_url} odds_url iter not found {bookms[i]}')
    if len(odds_data) == 0:
        app_logger.debug('odds data not found on {odds_info_url}')
        write_text_file(odds_info_url, 'nowg_parser/logs/failed_odds_stats.txt')
    return odds_data
示例#2
0
def run_multi_parse(urls, n_proc):
    app_logger.info(
        f'Start multiprocess function urls - {len(urls)} num processes - {n_proc}'
    )
    pool = Pool(n_proc)
    pool.map(run_parse, urls)
    pool.close()
    pool.join()
示例#3
0
def run_parse(url, page=None):
    app_logger.info(f'Start parsing urls on {url}')
    filepath = 'nowg_parser/urls/events_urls.txt'
    try:
        events_urls = get_analize_urls(url, page)
        [write_text_file(event_url, filepath) for event_url in events_urls]
    except Exception:
        app_logger.exception(f'Fail parser on url {url}')
        write_text_file(url, 'nowg_parser/urls/failed_parsing_urls3.txt')
def get_html(url):
    app_logger.info(f'Start receive html on {url}')
    try:
        with get_driver() as driver:
            driver.get(url)
            time.sleep(1)
            html = driver.page_source
    except Exception:
        app_logger.exception('Err received html on {url}')
        write_text_file(url, 'nowg_parser/logs/failed_stat_url.txt')
    return html
def run_parse(event_url):
    app_logger.info(f'Start parsing urls on {event_url}')
    odds_file = 'nowg_parser/data/odds_stats.csv'
    try:
        url, event_id = event_url
        odds_url = url.replace('analysis', '1x2').replace('html', 'htm')
        odds_info = get_odds_info(get_html(odds_url), odds_url)
        for odds_stat in odds_info:
            write_csv(odds_file, odds_stat, odds_stat.keys())
    except Exception:
        app_logger.exception(f'Fail parser on url {odds_url}')
        write_text_file(url, 'nowg_parser/logs/failed_parsing_stats.txt')
def get_event_stats(stat_html, info_html, event_id):
    app_logger.info(f'Start stat parse')
    soup = BeautifulSoup(stat_html, 'lxml')
    stat_table = find_stat_table(soup.select('table.bhTable'))
    trs = stat_table.select('tr')
    event_info = get_event_info(info_html)
    data = {}
    for tr in trs[1:]:
        tds = tr.select('td')
        row_name = tds[2].text.strip()
        home_score = tds[1].text.strip()
        away_score = tds[3].text.strip()
        data[row_name] = [home_score, away_score]
    return {'id': event_id, **event_info, **make_event_data(data)}
def run_multi_parse(urls, n_proc):
    app_logger.info(f'Start multiprocess function urls - {len(urls)} num processes - {n_proc}')
    with Pool(n_proc) as p:
        r = list(tqdm(p.imap(run_parse, urls), total=len(urls)))