def mexico_all_links(driver): #the mexican website provides advisory in spanish #we can display the link url = 'https://guiadelviajero.sre.gob.mx/' driver.get(url) soup = BeautifulSoup(driver.page_source, 'lxml') reg = regex.compile(r'\/103-ficha-de-paises\/') a = soup.findAll('a', attrs={'href': reg}) links = {} iso_es = get_iso_es() LOGGER.info( f'Retrieving the URLs for all countries for the Mexico advisory') for att in a: try: name = att.text.strip() iso = iso_es[name] href = 'https://guiadelviajero.sre.gob.mx' + att['href'] href = '<a href =\'' + href + '\'>Mexican Government Webesite</a>' links[iso] = { 'advisory_text': href, 'country_iso': iso, 'name': name } Logger.success(f'The URL for {name} was successfully retrieved') LOGGER.success( 'Successfully retrieved the URLs for all countries of the Mexican advisory' ) except Exception as error_msg: LOGGER.warning( f"This country's iso was not found for {name} because of the following error: {error_msg}" ) #get the visa for mexico like for other countries from wikipedia LOGGER.info( 'Parsing visa requirements for all countries for the Mexican advisory') try: wiki_visa_ob_MX = wiki_visa_parser(wiki_visa_url_MX, driver) visas = wiki_visa_ob_MX.visa_parser_table() visas = replace_key_by_iso(visas) LOGGER.success( 'Successfully parsed all countries for the Mexican advisory') except Exception as error_msg: LOGGER.error( f'Was not successful in parsing visa requirements for Mexican advisory because of the following error: {error_msg}' ) data = {} for key in visas: try: data[key] = links[key] info = data[key] info['visa-info'] = visas[key].get('visa-info') except Exception as error_msg: LOGGER.warning( f'The following iso was not found: {key} because of the following error: {error_msg}' ) return links
# Pick specific dataframe that will always be the third index data_table_social = data_tables[3] LOGGER.info(f'Parsing returned following dataframe: {data_table_social}') # Get latest year latest_year_social = data_table_social.columns[-1] LOGGER.info(f'Parsing returned following year: {latest_year_social}') # Get data of latest year social_data = data_table_social.iloc[:,-1] LOGGER.info(f'Parsing returned following year data: {social_data}') lifeExpectancy_object = social_data[social_data.index.str.startswith('Life expectancy')] lifeExpectancy = next(iter(lifeExpectancy_object), 'no match') LOGGER.success(f'Following currency data was retrieved: {lifeExpectancy}') infantMortality_object = social_data[social_data.index.str.startswith('Infant mortality')] infantMortality = next(iter(infantMortality_object), 'no match') LOGGER.success(f'Following currency data was retrieved: {infantMortality}') nbOfPhysicians_object = social_data[social_data.index.str.startswith('Health: Physicians')] nbOfPhysicians = next(iter(nbOfPhysicians_object), 'no match') LOGGER.success(f'Following currency data was retrieved: {nbOfPhysicians}') homicideRate_object = social_data[social_data.index.str.startswith('Intentional homicide rate')] homicideRate = next(iter(homicideRate_object), 'no match') LOGGER.success(f'Following currency data was retrieved: {homicideRate}') # Pick specific dataframe that will always be the fourth index data_table_env = data_tables[4]
# Create table if it does not exist DB.add_table('emergency', country='text', police='text', ambulance='text', fire='text') data_tables = pd.read_html('http://chartsbin.com/view/1983') data_table = data_tables[0] latest_year = data_table.columns[1] for country in iso_list_2: try: LOGGER.info(f'Getting emergency contacts data for {country}') if str(data_table.iloc[iso_list_2.index(country)][1]) == 'nan': police = '' else: police = data_table.iloc[iso_list_2.index(country)][1] if str(data_table.iloc[iso_list_2.index(country)][2]) == 'nan': ambulance = '' else: ambulance = data_table.iloc[iso_list_2.index(country)][2] if str(data_table.iloc[iso_list_2.index(country)][3]) == 'nan': fire = '' else: fire = data_table.iloc[iso_list_2.index(country)][3] LOGGER.success(f'Following emergency contacts data was retrieved: {country}: {police} {ambulance} {fire}') LOGGER.info('Inserting data into database.') DB.insert_or_update('emergency', country, police, ambulance, fire) LOGGER.success('Data successfully inserted in db') except Exception as error_msg: LOGGER.error(f'Could not get currency data for {country} because of the following error: {error_msg}') pass
result['operator'] = {'value': ''} if 'phone' not in result: result['phone'] = {'value': ''} if 'email' not in result: result['email'] = {'value': ''} if 'website' not in result: result['website'] = {'value': ''} LOGGER.info(f'Getting embassy data for {result["country"]["value"]}') DB.insert_or_update('embassies', result['country']['value'], result['city']['value'], result['operator']['value'], result['type']['value'], result['phone']['value'], result['email']['value'], result['website']['value']) LOGGER.success( f'Successfully entered embassy information for {result["country"]["value"]}' ) except Exception as error_msg: LOGGER.error( f'Could not get embassy data for {result["country"]["value"]} because of the following error: {error_msg}' ) pass for result in consulates_results["results"]["bindings"]: try: if 'city' not in result: result['city'] = {'value': ''} if 'operator' not in result: result['operator'] = {'value': ''} if 'phone' not in result: result['phone'] = {'value': ''}
# Parse currencies and add to database for country in iso_list: try: LOGGER.info(f'Beginning currency parsing for country: {country}') url_converter = UrlHelper(currency_api_link) information_link = url_converter.get_currency_api(country) LOGGER.info( f'Retrieving information from following link: {information_link}') currency_api = ApiHelper(url_converter.get_currency_api(country)) LOGGER.info(f'Parsing returned HTML code: {currency_api.get_code()}') data = currency_api.get_json()['currencies'][0] LOGGER.success(f'Following currency data was retrieved: {data}') for k, v in data.items(): if v is None: data[k] = "None" LOGGER.info('Inserting data into database.') DB.insert_or_update('currencies', country, data['name'], data['code'], data['symbol']) except Exception as error_msg: LOGGER.error( f'Could not get currency data for {country} because of the following error: {error_msg}' ) pass