def parse_fighter_page(ref): # parse main info from Sherdog logger.info("Parsing Sherdog info for {}".format(ref)) sherdog_page = _download_page(_sherdog_ref(ref)) fighter_name = sherdog_page.body.find('span', attrs={'class': 'fn'}).text ufc_url = "http://www.ufc.com/fighter/" + fighter_name.strip().replace(" ", "-") wiki_url = "https://en.wikipedia.org/wiki/" + fighter_name.strip().replace(" ", "_") fighter_data = FighterData( sherdog_page, _download_page(wiki_url), _download_page(ufc_url) ) # extract each field fighter = Fighter(ref=ref, name=fighter_name) fighter.country = fighter_data.extract(country_extractor) fighter.city = fighter_data.extract(city_extractor) fighter.birthday = fighter_data.extract(sherdog_birthday_extractor, wiki_birthday_extractor) fighter.height = fighter_data.extract(height_extractor) fighter.weight = fighter_data.extract(weight_extractor) fighter.reach = fighter_data.extract(wiki_reach_extractor, ufc_reach_extractor) fighter.specialization = fighter_data.extract(spec_extractor) fight_infos = _parse_fight_infos(ref, sherdog_page) return fighter, fight_infos
def parse_fighter_page(ref): # parse main info from Sherdog logger.info("Parsing Sherdog info for {}".format(ref)) sherdog_page = _download_page(_sherdog_ref(ref)) fighter_name = sherdog_page.body.find('span', attrs={'class': 'fn'}).text ufc_url = "http://www.ufc.com/fighter/" + fighter_name.strip().replace( " ", "-") wiki_url = "https://en.wikipedia.org/wiki/" + fighter_name.strip().replace( " ", "_") fighter_data = FighterData(sherdog_page, _download_page(wiki_url), _download_page(ufc_url)) # extract each field fighter = Fighter(ref=ref, name=fighter_name) fighter.country = fighter_data.extract(country_extractor) fighter.city = fighter_data.extract(city_extractor) fighter.birthday = fighter_data.extract(sherdog_birthday_extractor, wiki_birthday_extractor) fighter.height = fighter_data.extract(height_extractor) fighter.weight = fighter_data.extract(weight_extractor) fighter.reach = fighter_data.extract(wiki_reach_extractor, ufc_reach_extractor) fighter.specialization = fighter_data.extract(spec_extractor) fight_infos = _parse_fight_infos(ref, sherdog_page) return fighter, fight_infos