def _crawl(detail_link: str, driver: WebDriver) -> Info: driver.get(detail_link) time.sleep(5) accordeon = driver.find_elements_by_class_name('accordeon') try: driver.find_element_by_class_name( 'moove-gdpr-infobar-allow-all').click( ) # get rid of cookie banner blocking all clicks on this website except: pass info = _get_all_present_fields(accordeon, driver) return _convert(info)
def _extract_quick_facts( driver: WebDriver, idx: int ) -> Tuple[Optional[int], Optional[int], Optional[int], List[str]]: founding_year = None staff_number = None members_number = None languages: List[str] = [] possible_quick_facts_elements = driver.find_elements_by_class_name( 'ng-scope') likely_quick_facts_elements = [ p for p in possible_quick_facts_elements if p.find_elements_by_tag_name('strong') ] founded = [ l for l in likely_quick_facts_elements if l.text.startswith('Founded') ] staff = [ l for l in likely_quick_facts_elements if l.text.strip().endswith('staff') ] members = [ l for l in likely_quick_facts_elements if l.text.strip().endswith('members') ] working_languages = [ l for l in likely_quick_facts_elements if l.text.startswith('Working languages') ] if founded: founding_year = int( founded[0].find_element_by_tag_name('strong').text.strip()) if staff: staff_string = staff[0].find_element_by_tag_name('strong').text.strip() staff_string = staff_string.replace(' ', '') try: staff_number = int(staff_string) except: if staff_string == '6fulltime+2interns': staff_number = 6 if staff_string == '2.8': staff_number = 3 if staff_string == '3employeesand7volunteers': staff_number = 3 if staff_string == '5employees': staff_number = 5 print(f'SKIPPING STAFF ({idx}) - {staff_string}') if members: members_string = members[0].find_element_by_tag_name( 'strong').text.strip() members_string = members_string.replace(' ', '') members_string = members_string.replace('+', '') try: members_number = int(members_string) except: if members_string == '384organisations': members_number = 384 if members_string == '50delegations': members_number = 50 if members_string == '120,000': members_number = 120000 if members_string == 'Around35': members_number = 35 if members_string == 'about5000': members_number = 5000 if members_string == '46nationaluniversitysportsgoverningbodies': members_number = 46 if members_string == 'around1000membersin34branches': members_number = 1000 if members_string == '44associations': members_number = 44 if members_string == 'approx.700': members_number = 700 if members_string == '5,000,000': members_number = 5000000 if members_string == '250associations': members_number = 250 if members_string == '28memberorganisations': members_number = 28 if working_languages: languages = working_languages[0].find_element_by_tag_name( 'strong').text.strip() return founding_year, staff_number, members_number, languages