def get_fighter_name_and_details(
        fighter_name_and_link: Dict[str, List[str]]) -> Dict[str, List[str]]:
    fighter_name_and_details = {}

    l = len(fighter_name_and_link)
    print('Scraping all fighter data: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, (fighter_name,
                fighter_url) in enumerate(fighter_name_and_link.items()):
        another_soup = make_soup(fighter_url)
        divs = another_soup.findAll('li', {
            'class':
            "b-list__box-list-item b-list__box-list-item_type_block"
        })
        data = []
        for i, div in enumerate(divs):
            if i == 5:
                break
            data.append(div.text.replace('  ', '').replace('\n', '').replace('Height:', '').replace('Weight:', '')\
                       .replace('Reach:', '').replace('STANCE:', '').replace('DOB:', ''))

        fighter_name_and_details[fighter_name] = data
        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return fighter_name_and_details
def get_fighter_name_and_link(
        fighter_group_urls: List[str]) -> Dict[str, List[str]]:
    fighter_name_and_link = {}
    fighter_name = ''

    l = len(fighter_group_urls)
    print('Scraping all fighter names and links: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, fighter_group_url in enumerate(fighter_group_urls):
        soup = make_soup(fighter_group_url)
        table = soup.find('tbody')
        names = table.findAll('a', {'class': 'b-link b-link_style_black'},
                              href=True)
        for i, name in enumerate(names):
            if (i + 1) % 3 != 0:
                if fighter_name == '':
                    fighter_name = name.text
                else:
                    fighter_name = fighter_name + ' ' + name.text
            else:
                fighter_name_and_link[fighter_name] = name['href']
                fighter_name = ''
        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return fighter_name_and_link
示例#3
0
def get_upcoming_fight_stats(
        event_and_fight_links: Dict[str, List[str]]) -> str:
    total_stats = ''

    l = len(event_and_fight_links)
    print('Scraping upcoming fight data: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, (event, fights) in enumerate(event_and_fight_links.items()):
        event_soup = make_soup(event)
        event_info = get_event_info(event_soup)

        for fight in fights:
            try:
                fight_soup = make_soup(fight)
                fight_details = get_upcoming_fight_details(fight_soup)
            except Exception as e:
                continue

            total_upcoming_info = fight_details + ';' + event_info

            if total_stats == '':
                total_stats = total_upcoming_info
            else:
                total_stats = total_stats + '\n' + total_upcoming_info

        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return total_stats
示例#4
0
def get_fighter_name_and_link(
        fighter_group_urls: List[str]) -> Dict[str, List[str]]:
    fighter_name_and_link = {}
    fighter_name = ''

    l = len(fighter_group_urls)
    print('Scraping all fighter names and links: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, fighter_group_url in enumerate(fighter_group_urls):
        soup = make_soup(fighter_group_url)
        table = soup.find('tbody')
        names = table.findAll('a', {'class': 'b-link b-link_style_black'},
                              href=True)
        for i, name in enumerate(names):
            if (i + 1) % 3 != 0:
                if fighter_name == '':
                    fighter_name = name.text
                else:
                    fighter_name = fighter_name + ' ' + name.text
            else:
                fighter_name_and_link[fighter_name] = name['href']
                fighter_name = ''
        print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    pickle_in = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "rb")

    past_fighter_links = pickle.load(pickle_in)

    new_fighter_links = list(fighter_name_and_link.values())

    fighter_links = np.setdiff1d(new_fighter_links, past_fighter_links)

    pickle_in.close()

    #set event links to the newly scraped list
    pickle_out = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "wb")
    pickle.dump(new_fighter_links, pickle_out)
    pickle_out.close()

    fighter_name_and_link = dict(
        filter(lambda elem: elem[1] in fighter_links,
               fighter_name_and_link.items()))

    return fighter_name_and_link
    def run(self):

        dataframes_ = []
        l = len(self.unique_fighters)
        print_progress(0, l, prefix='Progress:', suffix='Complete')
        for index, fighter in enumerate(self.unique_fighters):
            dataframes_.append(self.retrive_odds(fighter))
            print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

        result = pd.concat(dataframes_)
        result[
            'duplicates'] = result.Fighter_one + result.Fighter_two + result.Average_Odds_f2 + result.Average_Odds_f1
        result['duplicates'] = result['duplicates'].apply(
            lambda x: x.replace(" ", "").replace(".", ""))
        result['duplicates'] = result['duplicates'].apply(
            lambda x: ''.join(sorted(x)))

        result.drop_duplicates(subset='duplicates', keep="first", inplace=True)
        result.to_csv(self.Fightersfile, index=False)
def get_fight(event_and_fight_links: Dict[str, List[str]]) -> str:
    dicts = []
    links_dict = {}

    l = len(event_and_fight_links)
    print('Scraping all fight data: ')
    print_progress(0, l, prefix='Progress:', suffix='Complete')

    for index, (event, fights) in enumerate(event_and_fight_links.items()):

        for fight in fights:
            fight_soup = make_soup(fight)
            fight_details = get_fighter_name_and_link(fight_soup)
            dicts.append(fight_details)

    for d in dicts:
        for k, v in d.items():
            links_dict.setdefault(k, []).append(v)
            links_dict[k] = links_dict[k][0]

    print(links_dict)
    print_progress(index + 1, l, prefix='Progress:', suffix='Complete')

    return links_dict