def detectCountryFromData(self, srcData: str) -> list: """Detect name of country from event data (WHOIS lookup, Geo Info, Physical Address, etc) Args: srcData (str): event data Returns: list: list of countries """ countries = list() if not srcData: return countries # Get dictionary of country codes and country names abbvCountryCodes = SpiderFootHelpers.countryCodes() # Look for countrycodes and country in source data for countryName in abbvCountryCodes.values(): if countryName.lower() not in srcData.lower(): continue # Look for country name in source data # Spaces are not included since "New Jersey" and others # will get interpreted as "Jersey", etc. matchCountries = re.findall( r"[,'\"\:\=\[\(\[\n\t\r\.] ?" + countryName + r"[,'\"\:\=\[\(\[\n\t\r\.]", srcData, re.IGNORECASE) if matchCountries: countries.append(countryName) # Look for "Country: ", usually found in Whois records matchCountries = re.findall("country: (.+?)", srcData, re.IGNORECASE) if matchCountries: for m in matchCountries: m = m.strip() if m in abbvCountryCodes: countries.append(abbvCountryCodes[m]) if m in abbvCountryCodes.values(): countries.append(m) return list(set(countries))
def test_countryCodes_should_return_a_dict(self): country_code_dict = SpiderFootHelpers.countryCodes() self.assertIsInstance(country_code_dict, dict)