def get_half_stat(url, half, command_id=None):
    html = get_page_source(url)
    soup = BeautifulSoup(html, 'lxml')
    app_logger.info(f'Start parsing HALF {half} stat for {url}\n')
    half_table = ('div#tab-statistics-1-statistic'
                  if half == '1st_half' else
                  'div#tab-statistics-2-statistic')
    stat_rows = soup.select(f'{half_table} div.statRow')
    half_stats = {}
    for stat_row in stat_rows:
        if command_id is not None:
            title_value = normalize_value(stat_row.select(
                'div.statText.statText--titleValue')[0].text)
            home_value = normalize_value(stat_row.select(
                'div.statText.statText--homeValue')[0].text)
            away_value = normalize_value(stat_row.select(
                'div.statText.statText--awayValue')[0].text)
            half_stats[f'{half}_{title_value}_OWN'] = int(
                [home_value, away_value][command_id])
            half_stats[f'{half}_{title_value}_ENEMY'] = int(
                [home_value, away_value][command_id - 1])
        else:
            title_value = normalize_value(stat_row.select(
                'div.statText.statText--titleValue')[0].text)
            home_value = normalize_value(stat_row.select(
                'div.statText.statText--homeValue')[0].text)
            away_value = normalize_value(stat_row.select(
                'div.statText.statText--awayValue')[0].text)
            half_stats[f'{half}_{title_value}_home'] = int(home_value)
            half_stats[f'{half}_{title_value}_away'] = int(away_value)
    app_logger.debug(f'Received HALF stat:\n{half_stats}\n')
    return half_stats
def get_past_stat(url):
    main_stat = get_main_stat(url)
    html = get_page_source(url)
    soup = BeautifulSoup(html, 'lxml')
    app_logger.info(f'Start collect PAST stat data from {url}\n')
    template = 'https://www.flashscore.com{}/results/'
    home_command_url = template.format(
        soup.select('div.team-text.tname-home a.participant-imglink')
        [0].attrs['onclick'].split("'")[1])
    away_command_url = template.format(
        soup.select('div.team-text.tname-away a.participant-imglink')
        [0].attrs['onclick'].split("'")[1])
    home_prev_events = find_previous_events(home_command_url,
                                            main_stat['date'])
    away_prev_events = find_previous_events(away_command_url,
                                            main_stat['date'])
    past_stat = {}
    home_past_stat = add_type_command(
        get_summary_stat(home_prev_events, main_stat['home_command'],
                         main_stat['championate'], 'home'), 'HOME')
    away_past_stat = add_type_command(
        get_summary_stat(away_prev_events, main_stat['away_command'],
                         main_stat['championate'], 'away'), 'AWAY')
    past_stat.update(home_past_stat)
    past_stat.update(away_past_stat)
    app_logger.debug('Formed PAST STAT')
    return past_stat
def get_live_stat(url):
    live_stat = {}
    live_stat.update(get_main_stat(url))
    first_half_url = url + '#match-statistics;1'
    second_half_url = url + '#match-statistics;2'
    live_stat.update(get_half_stat(first_half_url, '1st_half'))
    live_stat.update(get_half_stat(second_half_url, '2nd_half'))
    app_logger.debug(f'Formed data dict with live stat:\n {live_stat}\n')
    return live_stat
示例#4
0
def get_html(url):
    user_agent = UserAgent().chrome
    r = requests.get(url, headers={'User-Agent': user_agent})
    if r.ok:
        app_logger.debug(f'Received html page {url} code = {r.status_code}')
        return r.text
    else:
        app_logger.exception(f'Error getting html page {url} {r.status_code}')
    print(r.ok)
def get_summary_stat(stat_rows,
                     command,
                     championate,
                     position,
                     select_type='position'):
    app_logger.info(f'Start received SUMMARY stats for {command}\n')
    stat_rows = (find_position_events(stat_rows, command, position)
                 if select_type == 'position' else stat_rows)
    stat_rows = rows_filter(stat_rows, championate)
    app_logger.info(f'LEFT AFTER FILTER {len(stat_rows)} rows ')
    summary_stats = []
    for stat_row in stat_rows:
        event_stat = {}
        try:
            home_command = stat_row.select(
                'div.event__participant--home')[0].text.strip()
            away_command = stat_row.select(
                'div.event__participant--away')[0].text.strip()
            event_scores = stat_row.select('div.event__scores span')
            first_half_scores = stat_row.select(
                'div.event__part')[0].text.strip('(').strip(')').split('-')
            command_id = 0 if command in home_command else 1
            event_stat['goals_scored'] = event_scores[command_id].text
            event_stat['goals_missed'] = event_scores[command_id - 1].text
            event_stat['1half_goals_scored'] = first_half_scores[command_id]
            event_stat['1half_goals_missed'] = first_half_scores[command_id -
                                                                 1]
            event_id = stat_row['id'][4:]
            first_half_url = f'https://www.flashscore.com/match/{event_id}/#match-statistics;1'
            second_half_url = f'https://www.flashscore.com/match/{event_id}/#match-statistics;2'
            app_logger.info(
                f'DETAIL STAT {home_command} {event_scores} {away_command}')
            event_stat.update(
                get_half_stat(first_half_url, '1st_half', command_id))
            event_stat.update(
                get_half_stat(second_half_url, '2nd_half', command_id))
            summary_stats.append(event_stat)
        except Exception:
            app_logger.exception(
                f'\nError received data from stat row {command}')
        app_logger.debug(f'Formed event stats: \n{event_stat}\n')
    return (calculate_stat(summary_stats))
def calculate_stat(stats):
    app_logger.info('Start CALCULATING collected past stats')
    slices = [3, 5, 10, 15, 20]
    sums = {}
    slice_stats = {}
    for i, stat in enumerate(stats):
        keys = stat.keys()
        for key in keys:
            if key in sums:
                sums[key] += int(stat[key])
            else:
                sums[key] = int(stat[key])
        if i + 1 in slices:
            keys = sums.keys()
            values = sums.values()
            [
                slice_stats.update({f'{i+1}_last_{key}': value})
                for key, value in zip(keys, values)
            ]
    app_logger.debug(f'Formed slice stats: \n{slice_stats}\n')
    return slice_stats
示例#7
0
def insert_stat(html):
    try:
        soup = BeautifulSoup(html, 'lxml')
        HDA_odds, AH_odds, OU_odds = soup.select('div#oddsDetai div')[0:3]
        HDA_stat = get_data_from_table(HDA_odds.select('tr'), '1x2')
        AH_stat = get_data_from_table(AH_odds.select('tr'))
        OU_stat = get_data_from_table(OU_odds.select('tr'))
        app_logger.debug('Received HDA, AH, OU statistics by minutes')
        summary_stats = {}
        summary_stats.update(select_pre_match_line(HDA_stat, '1x2'))
        summary_stats.update(select_pre_match_line(AH_stat, 'AH'))
        summary_stats.update(select_pre_match_line(OU_stat, 'OU'))
        app_logger.debug('Added prematch line move')
        [
            summary_stats.update(stat)
            for stat in select_stat_in_HT(HDA_stat, '1x2')
        ]
        [
            summary_stats.update(stat)
            for stat in select_stat_in_HT(AH_stat, 'AH')
        ]
        [
            summary_stats.update(stat)
            for stat in select_stat_in_HT(OU_stat, 'OU')
        ]
        summary_stats.update(get_match_info(soup))
        app_logger.info(
            f'Formed objects with stats cnt keys={len(summary_stats.keys())}')
    except Exception:
        app_logger.exception('\nError received stats from elements page')
    insert_into_ng_odds(summary_stats)
    app_logger.debug('Record values in table\n')
def make_file_champ_urls(country_urls, amount_seasons=4):
    for url in tqdm(country_urls):
        archive_url = url + 'archive/'
        driver = get_driver
        driver.get(archive_url)
        time.sleep(1)
        champs_by_years = driver.find_elements_by_css_selector(
            'div.leagueTable__season div.leagueTable__seasonName')
        for i, champ in enumerate(champs_by_years[:amount_seasons + 1]):
            champ_text = champ.find_element_by_css_selector('a').text
            season = champ_text.split(' ')[1]
            country = driver.find_element_by_css_selector(
                'h2.tournament').text.split('\n')[1]
            try:
                champ_url = champ.find_element_by_css_selector(
                    'a').get_attribute('href')
                app_logger.debug(
                    f'received url - {champ_url} by {country} {season}')
                write_url_in_file(champ_url)
            except Exception:
                app_logger.exception(
                    '\nError getting or writing in file element')
def find_previous_events(url, event_date):
    html = get_more_events(url)
    soup = BeautifulSoup(html, 'lxml')
    app_logger.info(f'Start finding PREV events for {event_date}\n')
    stat_rows = soup.select('div.event__match')
    last_date = stat_rows[-1].select('div.event__time')[0].text.split(' ')[0]
    app_logger.info(f'LAST date in received stat rows = {last_date}\n')
    for stat_row in stat_rows:
        date = stat_row.select('div.event__time')[0].text.split(' ')
        date_parts = date[0].split('.')
        date_format = date_parts[0] + \
            date_parts[1] if len(date) > 1 else '.'.join(date_parts)
        event_date_parts = event_date.split('.')
        event_date_format = event_date_parts[0] + \
            event_date_parts[1] if len(date) > 1 else event_date
        if date_format == event_date_format:
            prev_events = stat_row.find_next_siblings(
                name='div', attrs={'class': 'event__match'})
            app_logger.debug(
                f'Found previous {len(prev_events)} events earlier {event_date}\n'
            )
            return prev_events
    return []
def get_events_urls(champoinate_url):
    driver = get_driver
    driver.get(champoinate_url)
    app_logger.debug(f'Open page - {champoinate_url}')
    time.sleep(1)
    more_event = driver.find_element_by_css_selector('a.event__more')
    more_event.send_keys(Keys.END)
    try:
        for i in range(1, 11):
            app_logger.debug(f'get events page #{i}')
            time.sleep(1)
            more_event.click()
            if i > 8:
                app_logger.debug('too many pages open')
    except Exception:
        app_logger.debug('All events open\n')
    time.sleep(1)
    events_lines = driver.find_elements_by_css_selector(
        'div.sportName div.event__match')
    events_id = [event.get_attribute('id') for event in events_lines]
    return make_url_event(events_id)
示例#11
0
def main(urls):
    for url in tqdm(urls):
        try:
            insert_stat(get_html(url))
        except Exception:
            app_logger.debug('\nError records values in database')