def _get_mapping(): """Loads zip, state, county, city mapping from file or directly from funcition @return: DataFrame loaded with the mapping information """ mapping_dict = None while mapping_dict is None: try: with open( os.path.join(ROOT_DIR, "handler", "zip_mapping", "mapping.json"), "r", ) as f: mapping_dict = json.load(f) print(f"{Bcolors.OKBLUE}Mapping data loaded.{Bcolors.ENDC}\n") except FileNotFoundError as e: print( f"{Bcolors.OKBLUE}Mapping data file not yet existent in directory. " f"Attempting to loading mapping from script instead. This could " f"""take a while so go grab some coffee. ( ) c[]{Bcolors.ENDC}""") try: mapping_dict = create_mapping() except WalkTheVoteError as e: raise WalkTheVoteError(f"{Bcolors.FAIL}{e}{Bcolors.ENDC}") print(f"{Bcolors.OKBLUE}Load successful.{Bcolors.ENDC}") except Exception as e: raise WalkTheVoteError( f"{Bcolors.FAIL}Unknown error loading mapping file{Bcolors.ENDC}" ) from e return mapping_dict
def format_address_data(address_data, county_name): mapping = electionsaver.addressSchemaMapping parsed_data_dict = {} try: parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0] except Exception as e: raise WalkTheVoteError( f"Error with data for {county_name} town, data is {parsed_data_dict}" ) from e final_address = {"state": "SC"} if "city" in parsed_data_dict: final_address["city"] = parsed_data_dict["city"].title() if "zipCode" in parsed_data_dict: final_address["zipCode"] = parsed_data_dict["zipCode"] if "streetNumberName" in parsed_data_dict: final_address["streetNumberName"] = parsed_data_dict[ "streetNumberName"].title() if "poBox" in parsed_data_dict: final_address["poBox"] = parsed_data_dict["poBox"].title() final_address["locationName"] = parsed_data_dict.get( "locationName", f"{county_name} County Board of Voter Registration & Elections".title( )) if "aptNumber" in parsed_data_dict: final_address["aptNumber"] = parsed_data_dict["aptNumber"].title() return final_address
def format_address_data(address_data, town_name): mapping = electionsaver.addressSchemaMapping # Edge cases if address_data == "20 PARK ST GORHAM": address_data = "20 PARK ST GORHAM 03581" parsed_data_dict = {} try: parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0] except Exception as e: raise WalkTheVoteError( f"Error with data for {town_name} town, data is {parsed_data_dict}" ) from e final_address = {"state": "NH"} if "city" in parsed_data_dict: final_address["city"] = parsed_data_dict["city"].title() if "zipCode" in parsed_data_dict: final_address["zipCode"] = parsed_data_dict["zipCode"] if "streetNumberName" in parsed_data_dict: final_address["streetNumberName"] = parsed_data_dict[ "streetNumberName"].title() if "poBox" in parsed_data_dict: final_address["poBox"] = parsed_data_dict["poBox"].title() final_address["locationName"] = parsed_data_dict.get( "locationName", f"{town_name} City Election Office".title()) if "aptNumber" in parsed_data_dict: final_address["aptNumber"] = parsed_data_dict["aptNumber"].title() return final_address
def __init__(self, state): self._drivers = [ _Driver( name="chromedriver", driver_options=chrome.options.Options(), driver=webdriver.Chrome, install_info="https://sites.google.com/a/chromium.org/chromedriver" "/downloads", ), _Driver( name="geckodriver", driver_options=firefox.options.Options(), driver=webdriver.Firefox, install_info="https://github.com/mozilla/geckodriver/releases", ), ] for driver in self._drivers: driver.driver_path = shutil.which(driver.name) if driver.driver_path: driver.driver_options.add_argument("--headless") self._primary_driver = driver.driver( executable_path=driver.driver_path, options=driver.driver_options ) break else: raise WalkTheVoteError(self._print_error(state=state))
def data_to_json_schema(): csv_path = os.path.join(DIRECTORY, "county-elections-departments.csv") if not os.path.isfile(csv_path): raise WalkTheVoteError( f"Prerequisite CSV file needed to scrape Washington. Please go to " f"https://www.sos.wa.gov/elections/viewauditors.aspx, click export to " f"excel and download the CSV file to {DIRECTORY}. Make sure the downloaded " f'file is named "county-elections-departments.csv".') info_df = pd.read_csv( os.path.join(DIRECTORY, "county-elections-departments.csv"), index_col=False, ) county_list = info_df["County"].values website_list = info_df["Web"].values address_list = info_df["Address"].values city_list = info_df["City"] zip_list = info_df["Zip"] email_list = info_df["Email"].values phone_list = info_df["Phone"].values address_list_formatted = [] for i in range(len(address_list)): address_list_formatted.append( format_address_data(address_list[i], county_list[i], zip_list[i], city_list[i])) master_list = [] for i in range(len(county_list)): schema = { "countyName": county_list[i].title(), "phone": phone_list[i], "email": email_list[i], "website": website_list[i] if not str(website_list[i]) == "nan" else "https://www.sos.wa.gov/elections/viewauditors.aspx", } if "poBox" in address_list_formatted[i]: schema["mailingAddress"] = address_list_formatted[i] else: schema["physicalAddress"] = address_list_formatted[i] master_list.append(schema) master_list = sorted(master_list, key=lambda county: county["countyName"]) with open(os.path.join(DIRECTORY, "washington.json"), "w") as f: json.dump(master_list, f) return master_list
async def _get_scraper_data(scraper) -> str: """Run scraper function and assign results to data variable of scraper object """ try: scraper.election_offices = await scraper.get_election_office() except Exception as e: raise WalkTheVoteError( f"{Bcolors.WARNING}Problem getting election office data from " f"{scraper.state_name}_scraper.py: {e}{Bcolors.ENDC}") else: return scraper.state_name
async def get_election_offices(): csv_path = os.path.join(DIRECTORY, "StateList-Clerks & PollingPlaces.csv") if not os.path.isfile(csv_path): raise WalkTheVoteError( f"Prerequisite CSV file needed to scrape New Hampshire. Please go to " f"https://app.sos.nh.gov/Public/Reports.aspx and download the CSV file to " f"{DIRECTORY}. Make sure the downloaded file is named " f'"StateList-Clerks & PollingPlaces.csv". Once downloaded, you\'ll need ' f"to name the right-most column that is currently unnamed because new " f"hampshire SOS staff couldn't be asked to do it.") if not os.path.isfile(os.path.join(DIRECTORY, "NewHampshireInfo.csv")): clean_raw_file(csv_path) return data_to_json_schema()
def __init__(self, db_uri, db_alias): self.preloaded = self._is_db_preloaded() self.scrapers = [] self.failed_scraper_data_retrieval_msgs = [] try: connect(db_uri, alias=db_alias) except Exception as e: raise WalkTheVoteError( f"{Bcolors.FAIL}Problem connecting to database: {db_alias}{Bcolors.ENDC}" ) from e # Map get_election_office() function of scrapers to corresponding state name for imported_scraper_module in self._get_imported_scrapers(): state_name = re.search(r"[a-z_]+(?=\.[a-z_]+scraper)", imported_scraper_module.__name__).group() module = getattr(imported_scraper_module, "get_election_offices") self.scrapers.append(Scraper(state_name, module))
def format_address_data(address_data, county_name, zip_code, city_name): mapping = electionsaver.addressSchemaMapping address_data = address_data.replace("<br />", " ") print(county_name, address_data, city_name, zip_code) # Edge cases if county_name == "Benton": address_data = "620 Market St" if county_name == "Pacific": address_data = "300 Memorial Dr, South Bend, 98586" if county_name == "Yakima": address_data = "128 N. Second Street, Room 117 Yakima, WA 98901-2639" parsed_data_dict = {} try: parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0] except Exception as e: raise WalkTheVoteError( f"Error with data for {county_name} town, data is {parsed_data_dict}" ) from e final_address = {"state": "WA"} if "city" in parsed_data_dict: final_address["city"] = parsed_data_dict["city"].title() else: final_address["city"] = city_name.title() if "zipCode" in parsed_data_dict: final_address["zipCode"] = parsed_data_dict["zipCode"] else: final_address["zipCode"] = zip_code.title() if "streetNumberName" in parsed_data_dict: final_address["streetNumberName"] = parsed_data_dict[ "streetNumberName"].title() if "poBox" in parsed_data_dict: final_address["poBox"] = parsed_data_dict["poBox"].title() final_address["locationName"] = parsed_data_dict.get( "locationName", f"{county_name} City Election Office".title()) if "aptNumber" in parsed_data_dict: final_address["aptNumber"] = parsed_data_dict["aptNumber"].title() return final_address
def create_mapping(): csv_path = os.path.join(DIRECTORY, "zip_code_database.csv") if not os.path.isfile(csv_path): raise WalkTheVoteError( f'Prerequisite CSV file needed to create database mapping.\n\nPlease go to ' f'https://www.unitedstateszipcodes.org/zip-code-database/ and download\nthe' f' free zip code database file to {DIRECTORY}.\n\nMake sure the downloaded ' f'file is named "zip_code_database.csv"') mapping_df = pd.read_csv(csv_path) final_mapping = {} for ( zip_code, primary_city, acceptable_cities, unacceptable_cities, state, county, ) in zip( mapping_df["zip"].values, mapping_df["primary_city"].values, mapping_df["acceptable_cities"].values, mapping_df["unacceptable_cities"].values, mapping_df["state"].values, mapping_df["county"].values, ): a_cities = "" u_cities = "" if not pd.isna(acceptable_cities): a_cities = f", {acceptable_cities}" if not pd.isna(unacceptable_cities): u_cities = f", {unacceptable_cities}" final_mapping[f"{zip_code:05}"] = { f"{primary_city}{a_cities}{u_cities}": { county: str(us.states.lookup(state)) } } with open(os.path.join(DIRECTORY, "mapping.json"), "w") as f: json.dump(final_mapping, f) return final_mapping
def format_address_data(address_data, county_name): mapping = electionsaver.addressSchemaMapping # parsed_data_dict = usaddress.tag(addressData, tag_mapping=mapping)[0] # edge cases # lol doctor and drive have the same abbreviation if county_name == "Collier": address_data = address_data.replace("Rev Dr", "Reverend Doctor") # this county only has a PO Box, and I happened to click on the website # and find out there's an actual physical location lol.. got lucky if county_name == "Citrus": address_data = "1500 N. Meadowcrest Blvd. Crystal River, FL 34429" parsed_data_dict = {} try: parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0] except Exception as e: raise WalkTheVoteError( f"Error with data for {county_name} county, data is {parsed_data_dict}" ) from e final_address = { "city": parsed_data_dict["city"], "state": parsed_data_dict["state"], "zipCode": parsed_data_dict["zipCode"], } if "streetNumberName" in parsed_data_dict: final_address["streetNumberName"] = parsed_data_dict["streetNumberName"] if "poBox" in parsed_data_dict: final_address["poBox"] = parsed_data_dict["poBox"] final_address["locationName"] = parsed_data_dict.get( "locationName", f"{county_name} County Election Office" ) if "aptNumber" in parsed_data_dict: final_address["aptNumber"] = parsed_data_dict["aptNumber"] return final_address
def format_address_data(address_data, county_name, is_physical, mailing_addr=None): if mailing_addr is None: mailing_addr = {} mapping = electionsaver.addressSchemaMapping parsed_data_dict = {} try: parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0] except Exception as e: raise WalkTheVoteError( f"Error with data for {county_name} county, data is {parsed_data_dict}" ) from e final_address = {} # Edge cases: add missing info. if county_name == "SUMTER": parsed_data_dict["city"] = "Americus" # Sometimes info is only in mailing address, if data is missing in physical, add the info from mailing if "city" in parsed_data_dict: final_address["city"] = parsed_data_dict["city"].title() elif is_physical and "city" in mailing_addr: final_address["city"] = mailing_addr["city"].title() if "state" in parsed_data_dict: final_address["state"] = parsed_data_dict["state"].title() elif is_physical and "state" in mailing_addr: final_address["state"] = mailing_addr["state"].title() if "zipCode" in parsed_data_dict: final_address["zipCode"] = parsed_data_dict["zipCode"].title() elif is_physical and "zipCode" in mailing_addr: final_address["zipCode"] = mailing_addr["zipCode"].title() if "streetNumberName" in parsed_data_dict: final_address["streetNumberName"] = parsed_data_dict[ "streetNumberName"].title() elif is_physical and "streetNumberName" in mailing_addr: final_address["streetNumberName"] = mailing_addr[ "streetNumberName"].title() if "locationName" in parsed_data_dict: final_address["locationName"] = parsed_data_dict.get( "locationName").title() elif is_physical and "locationName" in mailing_addr: final_address["locationName"] = mailing_addr["locationName"].title() else: final_address[ "locationName"] = f"{county_name.title()} County Election Office" if "aptNumber" in parsed_data_dict: final_address["aptNumber"] = parsed_data_dict["aptNumber"].title() elif is_physical and "aptNumber" in mailing_addr: final_address["aptNumber"] = mailing_addr["aptNumber"].title() if "poBox" in parsed_data_dict: final_address["poBox"] = parsed_data_dict["poBox"].title() return final_address