def get_odds_info(html, odds_info_url): app_logger.info(f'Start stat parse') soup = BeautifulSoup(html, 'lxml') event_info = get_event_info(html) pinnacle_row = soup.select('tr#oddstr_177') sbobet_row = soup.select('tr#oddstr_474') betfair_row = soup.select('tr#oddstr_2') xbet_row = soup.select('tr#oddstr_1047') marathon_row = soup.select('tr#oddstr_816') odds_rows = [pinnacle_row, sbobet_row, betfair_row, xbet_row, marathon_row] bookms = ['pinnacle', 'sbobet', 'betfair', '1xbet', 'marathon'] odds_data = [] for i, odds_row in enumerate(odds_rows): try: odds_url = odds_row[0].select('td')[2]['onclick'].split("'")[1] odds_change_info = get_odds_change(get_html(odds_url), odds_info_url) odds_data.append({**event_info, **odds_change_info}) except Exception: app_logger.info(f'Received odds info on {odds_info_url} odds_url iter not found {bookms[i]}') if len(odds_data) == 0: app_logger.debug('odds data not found on {odds_info_url}') write_text_file(odds_info_url, 'nowg_parser/logs/failed_odds_stats.txt') return odds_data
def run_multi_parse(urls, n_proc): app_logger.info( f'Start multiprocess function urls - {len(urls)} num processes - {n_proc}' ) pool = Pool(n_proc) pool.map(run_parse, urls) pool.close() pool.join()
def run_parse(url, page=None): app_logger.info(f'Start parsing urls on {url}') filepath = 'nowg_parser/urls/events_urls.txt' try: events_urls = get_analize_urls(url, page) [write_text_file(event_url, filepath) for event_url in events_urls] except Exception: app_logger.exception(f'Fail parser on url {url}') write_text_file(url, 'nowg_parser/urls/failed_parsing_urls3.txt')
def get_html(url): app_logger.info(f'Start receive html on {url}') try: with get_driver() as driver: driver.get(url) time.sleep(1) html = driver.page_source except Exception: app_logger.exception('Err received html on {url}') write_text_file(url, 'nowg_parser/logs/failed_stat_url.txt') return html
def run_parse(event_url): app_logger.info(f'Start parsing urls on {event_url}') odds_file = 'nowg_parser/data/odds_stats.csv' try: url, event_id = event_url odds_url = url.replace('analysis', '1x2').replace('html', 'htm') odds_info = get_odds_info(get_html(odds_url), odds_url) for odds_stat in odds_info: write_csv(odds_file, odds_stat, odds_stat.keys()) except Exception: app_logger.exception(f'Fail parser on url {odds_url}') write_text_file(url, 'nowg_parser/logs/failed_parsing_stats.txt')
def get_event_stats(stat_html, info_html, event_id): app_logger.info(f'Start stat parse') soup = BeautifulSoup(stat_html, 'lxml') stat_table = find_stat_table(soup.select('table.bhTable')) trs = stat_table.select('tr') event_info = get_event_info(info_html) data = {} for tr in trs[1:]: tds = tr.select('td') row_name = tds[2].text.strip() home_score = tds[1].text.strip() away_score = tds[3].text.strip() data[row_name] = [home_score, away_score] return {'id': event_id, **event_info, **make_event_data(data)}
def run_multi_parse(urls, n_proc): app_logger.info(f'Start multiprocess function urls - {len(urls)} num processes - {n_proc}') with Pool(n_proc) as p: r = list(tqdm(p.imap(run_parse, urls), total=len(urls)))