示例#1
0
def get_upcoming_fight_stats(
        event_and_fight_links: Dict[str, List[str]]) -> str:
    total_stats = ''

    l = len(event_and_fight_links)
    print('Scraping upcoming fight data: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, (event, fights) in enumerate(event_and_fight_links.items()):
        event_soup = make_soup(event)
        event_info = get_event_info(event_soup)

        for fight in fights:
            try:
                fight_soup = make_soup(fight)
                fight_details = get_upcoming_fight_details(fight_soup)
            except Exception as e:
                continue

            total_upcoming_info = fight_details + ';' + event_info

            if total_stats == '':
                total_stats = total_upcoming_info
            else:
                total_stats = total_stats + '\n' + total_upcoming_info

        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return total_stats
def get_fighter_name_and_details(
        fighter_name_and_link: Dict[str, List[str]]) -> Dict[str, List[str]]:
    fighter_name_and_details = {}

    l = len(fighter_name_and_link)
    print('Scraping all fighter data: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, (fighter_name,
                fighter_url) in enumerate(fighter_name_and_link.items()):
        another_soup = make_soup(fighter_url)
        divs = another_soup.findAll('li', {
            'class':
            "b-list__box-list-item b-list__box-list-item_type_block"
        })
        data = []
        for i, div in enumerate(divs):
            if i == 5:
                break
            data.append(div.text.replace('  ', '').replace('\n', '').replace('Height:', '').replace('Weight:', '')\
                       .replace('Reach:', '').replace('STANCE:', '').replace('DOB:', ''))

        fighter_name_and_details[fighter_name] = data
        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return fighter_name_and_details
def get_fighter_name_and_link(
        fighter_group_urls: List[str]) -> Dict[str, List[str]]:
    fighter_name_and_link = {}
    fighter_name = ''

    l = len(fighter_group_urls)
    print('Scraping all fighter names and links: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, fighter_group_url in enumerate(fighter_group_urls):
        soup = make_soup(fighter_group_url)
        table = soup.find('tbody')
        names = table.findAll('a', {'class': 'b-link b-link_style_black'},
                              href=True)
        for i, name in enumerate(names):
            if (i + 1) % 3 != 0:
                if fighter_name == '':
                    fighter_name = name.text
                else:
                    fighter_name = fighter_name + ' ' + name.text
            else:
                fighter_name_and_link[fighter_name] = name['href']
                fighter_name = ''
        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return fighter_name_and_link
def get_link_of_upcoming_events_no_pickle(upcoming_events_url: str=UPCOMING_EVENTS_URL) -> List[str]:
    links = []
    url = upcoming_events_url
    soup = make_soup(upcoming_events_url)
    for link in soup.findAll('td',{'class': 'b-statistics__table-col'}):
        for href in link.findAll('a'):
            foo = href.get('href')
            links.append(foo)

    return links
示例#5
0
def get_all_odds(odds_url: str = UPCOMING_ODDS_URL) -> List[str]:
    links = []
    url = all_events_url
    soup = make_soup(UPCOMING_ODDS_URL)
    for link in soup.findAll('div', {'class': 'op-content-wrapper'}):
        for href in link.findAll('a'):
            foo = href.get('href')
            links.append(foo)

    return links
def get_event_and_fight_links_no_pickle(event_links: List[str]) -> Dict[str, List[str]]:
	event_and_fight_links = {}
	for link in event_links:
		event_fights = []
		soup = make_soup(link)
		for row in soup.findAll('tr', {'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'}):
			href = row.get('data-link')
			event_fights.append(href)
		event_and_fight_links[link] = event_fights

	return event_and_fight_links
示例#7
0
def get_link_of_past_events(all_events_url: str=ALL_EVENTS_URL) -> List[str]:
	links = []
	soup = make_soup(all_events_url)
	for link in soup.findAll('td',{'class': 'b-statistics__table-col'}):
		for href in link.findAll('a'):
			foo = href.get('href')
			links.append(foo)
	pickle_out = open(PAST_EVENT_LINKS_PATH.as_posix(),"wb")
	pickle.dump(links, pickle_out)
	pickle_out.close()

	return links
def get_event_and_fight_links(event_links: List[str]) -> Dict[str, List[str]]:
	event_and_fight_links = {}
	for link in event_links:
		event_fights = []
		soup = make_soup(link)
		for row in soup.findAll('tr', {'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'}):
			href = row.get('data-link')
			event_fights.append(href)
		event_and_fight_links[link] = event_fights

	pickle_out = open(EVENT_AND_FIGHT_LINKS_PATH.as_posix(),"wb")
	pickle.dump(event_and_fight_links, pickle_out)
	pickle_out.close()

	return event_and_fight_links
示例#9
0
def get_fighter_name_and_link(
        fighter_group_urls: List[str]) -> Dict[str, List[str]]:
    fighter_name_and_link = {}
    fighter_name = ''

    l = len(fighter_group_urls)
    print('Scraping all fighter names and links: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, fighter_group_url in enumerate(fighter_group_urls):
        soup = make_soup(fighter_group_url)
        table = soup.find('tbody')
        names = table.findAll('a', {'class': 'b-link b-link_style_black'},
                              href=True)
        for i, name in enumerate(names):
            if (i + 1) % 3 != 0:
                if fighter_name == '':
                    fighter_name = name.text
                else:
                    fighter_name = fighter_name + ' ' + name.text
            else:
                fighter_name_and_link[fighter_name] = name['href']
                fighter_name = ''
        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    pickle_in = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "rb")

    past_fighter_links = pickle.load(pickle_in)

    new_fighter_links = list(fighter_name_and_link.values())

    fighter_links = np.setdiff1d(new_fighter_links, past_fighter_links)

    pickle_in.close()

    #set event links to the newly scraped list
    pickle_out = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "wb")
    pickle.dump(new_fighter_links, pickle_out)
    pickle_out.close()

    fighter_name_and_link = dict(
        filter(lambda elem: elem[1] in fighter_links,
               fighter_name_and_link.items()))

    return fighter_name_and_link
def get_fight(event_and_fight_links: Dict[str, List[str]]) -> str:
    dicts = []
    links_dict = {}

    l = len(event_and_fight_links)
    print('Scraping all fight data: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, (event, fights) in enumerate(event_and_fight_links.items()):

        for fight in fights:
            fight_soup = make_soup(fight)
            fight_details = get_fighter_name_and_link(fight_soup)
            dicts.append(fight_details)

    for d in dicts:
        for k, v in d.items():
            links_dict.setdefault(k, []).append(v)
            links_dict[k] = links_dict[k][0]

    print(links_dict)
    print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return links_dict