示例#1
0
    def _get_mapping():
        """Loads zip, state, county, city mapping from file or directly from funcition

        @return: DataFrame loaded with the mapping information
        """
        mapping_dict = None
        while mapping_dict is None:
            try:
                with open(
                        os.path.join(ROOT_DIR, "handler", "zip_mapping",
                                     "mapping.json"),
                        "r",
                ) as f:
                    mapping_dict = json.load(f)
                print(f"{Bcolors.OKBLUE}Mapping data loaded.{Bcolors.ENDC}\n")
            except FileNotFoundError as e:
                print(
                    f"{Bcolors.OKBLUE}Mapping data file not yet existent in directory. "
                    f"Attempting to loading mapping from script instead. This could "
                    f"""take a while so go grab some coffee.
 ( 
  )
c[]{Bcolors.ENDC}""")
                try:
                    mapping_dict = create_mapping()
                except WalkTheVoteError as e:
                    raise WalkTheVoteError(f"{Bcolors.FAIL}{e}{Bcolors.ENDC}")
                print(f"{Bcolors.OKBLUE}Load successful.{Bcolors.ENDC}")
            except Exception as e:
                raise WalkTheVoteError(
                    f"{Bcolors.FAIL}Unknown error loading mapping file{Bcolors.ENDC}"
                ) from e
        return mapping_dict
示例#2
0
def format_address_data(address_data, county_name):
    mapping = electionsaver.addressSchemaMapping

    parsed_data_dict = {}
    try:
        parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0]
    except Exception as e:
        raise WalkTheVoteError(
            f"Error with data for {county_name} town, data is {parsed_data_dict}"
        ) from e

    final_address = {"state": "SC"}

    if "city" in parsed_data_dict:
        final_address["city"] = parsed_data_dict["city"].title()
    if "zipCode" in parsed_data_dict:
        final_address["zipCode"] = parsed_data_dict["zipCode"]
    if "streetNumberName" in parsed_data_dict:
        final_address["streetNumberName"] = parsed_data_dict[
            "streetNumberName"].title()
    if "poBox" in parsed_data_dict:
        final_address["poBox"] = parsed_data_dict["poBox"].title()
    final_address["locationName"] = parsed_data_dict.get(
        "locationName",
        f"{county_name} County Board of Voter Registration & Elections".title(
        ))
    if "aptNumber" in parsed_data_dict:
        final_address["aptNumber"] = parsed_data_dict["aptNumber"].title()
    return final_address
示例#3
0
def format_address_data(address_data, town_name):
    mapping = electionsaver.addressSchemaMapping

    # Edge cases
    if address_data == "20 PARK ST GORHAM":
        address_data = "20 PARK ST GORHAM 03581"

    parsed_data_dict = {}
    try:
        parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0]
    except Exception as e:
        raise WalkTheVoteError(
            f"Error with data for {town_name} town, data is {parsed_data_dict}"
        ) from e

    final_address = {"state": "NH"}

    if "city" in parsed_data_dict:
        final_address["city"] = parsed_data_dict["city"].title()
    if "zipCode" in parsed_data_dict:
        final_address["zipCode"] = parsed_data_dict["zipCode"]
    if "streetNumberName" in parsed_data_dict:
        final_address["streetNumberName"] = parsed_data_dict[
            "streetNumberName"].title()
    if "poBox" in parsed_data_dict:
        final_address["poBox"] = parsed_data_dict["poBox"].title()
    final_address["locationName"] = parsed_data_dict.get(
        "locationName", f"{town_name} City Election Office".title())
    if "aptNumber" in parsed_data_dict:
        final_address["aptNumber"] = parsed_data_dict["aptNumber"].title()
    return final_address
示例#4
0
    def __init__(self, state):
        self._drivers = [
            _Driver(
                name="chromedriver",
                driver_options=chrome.options.Options(),
                driver=webdriver.Chrome,
                install_info="https://sites.google.com/a/chromium.org/chromedriver"
                             "/downloads",
            ),
            _Driver(
                name="geckodriver",
                driver_options=firefox.options.Options(),
                driver=webdriver.Firefox,
                install_info="https://github.com/mozilla/geckodriver/releases",
            ),
        ]

        for driver in self._drivers:
            driver.driver_path = shutil.which(driver.name)
            if driver.driver_path:
                driver.driver_options.add_argument("--headless")
                self._primary_driver = driver.driver(
                    executable_path=driver.driver_path, options=driver.driver_options
                )
                break
        else:
            raise WalkTheVoteError(self._print_error(state=state))
def data_to_json_schema():
    csv_path = os.path.join(DIRECTORY, "county-elections-departments.csv")
    if not os.path.isfile(csv_path):
        raise WalkTheVoteError(
            f"Prerequisite CSV file needed to scrape Washington. Please go to "
            f"https://www.sos.wa.gov/elections/viewauditors.aspx, click export to "
            f"excel and download the CSV file to {DIRECTORY}. Make sure the downloaded "
            f'file is named "county-elections-departments.csv".')
    info_df = pd.read_csv(
        os.path.join(DIRECTORY, "county-elections-departments.csv"),
        index_col=False,
    )

    county_list = info_df["County"].values
    website_list = info_df["Web"].values
    address_list = info_df["Address"].values
    city_list = info_df["City"]
    zip_list = info_df["Zip"]
    email_list = info_df["Email"].values
    phone_list = info_df["Phone"].values

    address_list_formatted = []
    for i in range(len(address_list)):
        address_list_formatted.append(
            format_address_data(address_list[i], county_list[i], zip_list[i],
                                city_list[i]))

    master_list = []

    for i in range(len(county_list)):
        schema = {
            "countyName":
            county_list[i].title(),
            "phone":
            phone_list[i],
            "email":
            email_list[i],
            "website":
            website_list[i] if not str(website_list[i]) == "nan" else
            "https://www.sos.wa.gov/elections/viewauditors.aspx",
        }

        if "poBox" in address_list_formatted[i]:
            schema["mailingAddress"] = address_list_formatted[i]
        else:
            schema["physicalAddress"] = address_list_formatted[i]

        master_list.append(schema)

    master_list = sorted(master_list, key=lambda county: county["countyName"])

    with open(os.path.join(DIRECTORY, "washington.json"), "w") as f:
        json.dump(master_list, f)
    return master_list
示例#6
0
 async def _get_scraper_data(scraper) -> str:
     """Run scraper function and assign results to data variable of scraper
     object
     """
     try:
         scraper.election_offices = await scraper.get_election_office()
     except Exception as e:
         raise WalkTheVoteError(
             f"{Bcolors.WARNING}Problem getting election office data from "
             f"{scraper.state_name}_scraper.py: {e}{Bcolors.ENDC}")
     else:
         return scraper.state_name
示例#7
0
async def get_election_offices():
    csv_path = os.path.join(DIRECTORY, "StateList-Clerks & PollingPlaces.csv")
    if not os.path.isfile(csv_path):
        raise WalkTheVoteError(
            f"Prerequisite CSV file needed to scrape New Hampshire. Please go to "
            f"https://app.sos.nh.gov/Public/Reports.aspx and download the CSV file to "
            f"{DIRECTORY}. Make sure the downloaded file is named "
            f'"StateList-Clerks & PollingPlaces.csv". Once downloaded, you\'ll need '
            f"to name the right-most column that is currently unnamed because new "
            f"hampshire SOS staff couldn't be asked to do it.")
    if not os.path.isfile(os.path.join(DIRECTORY, "NewHampshireInfo.csv")):
        clean_raw_file(csv_path)
    return data_to_json_schema()
示例#8
0
    def __init__(self, db_uri, db_alias):
        self.preloaded = self._is_db_preloaded()
        self.scrapers = []
        self.failed_scraper_data_retrieval_msgs = []

        try:
            connect(db_uri, alias=db_alias)
        except Exception as e:
            raise WalkTheVoteError(
                f"{Bcolors.FAIL}Problem connecting to database: {db_alias}{Bcolors.ENDC}"
            ) from e

        # Map get_election_office() function of scrapers to corresponding state name
        for imported_scraper_module in self._get_imported_scrapers():
            state_name = re.search(r"[a-z_]+(?=\.[a-z_]+scraper)",
                                   imported_scraper_module.__name__).group()
            module = getattr(imported_scraper_module, "get_election_offices")
            self.scrapers.append(Scraper(state_name, module))
def format_address_data(address_data, county_name, zip_code, city_name):
    mapping = electionsaver.addressSchemaMapping

    address_data = address_data.replace("<br />", " ")
    print(county_name, address_data, city_name, zip_code)

    # Edge cases
    if county_name == "Benton":
        address_data = "620 Market St"
    if county_name == "Pacific":
        address_data = "300 Memorial Dr, South Bend, 98586"
    if county_name == "Yakima":
        address_data = "128 N. Second Street, Room 117 Yakima, WA 98901-2639"

    parsed_data_dict = {}
    try:
        parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0]
    except Exception as e:
        raise WalkTheVoteError(
            f"Error with data for {county_name} town, data is {parsed_data_dict}"
        ) from e

    final_address = {"state": "WA"}

    if "city" in parsed_data_dict:
        final_address["city"] = parsed_data_dict["city"].title()
    else:
        final_address["city"] = city_name.title()
    if "zipCode" in parsed_data_dict:
        final_address["zipCode"] = parsed_data_dict["zipCode"]
    else:
        final_address["zipCode"] = zip_code.title()
    if "streetNumberName" in parsed_data_dict:
        final_address["streetNumberName"] = parsed_data_dict[
            "streetNumberName"].title()
    if "poBox" in parsed_data_dict:
        final_address["poBox"] = parsed_data_dict["poBox"].title()
    final_address["locationName"] = parsed_data_dict.get(
        "locationName", f"{county_name} City Election Office".title())
    if "aptNumber" in parsed_data_dict:
        final_address["aptNumber"] = parsed_data_dict["aptNumber"].title()
    return final_address
示例#10
0
def create_mapping():
    csv_path = os.path.join(DIRECTORY, "zip_code_database.csv")
    if not os.path.isfile(csv_path):
        raise WalkTheVoteError(
            f'Prerequisite CSV file needed to create database mapping.\n\nPlease go to '
            f'https://www.unitedstateszipcodes.org/zip-code-database/ and download\nthe'
            f' free zip code database file to {DIRECTORY}.\n\nMake sure the downloaded '
            f'file is named "zip_code_database.csv"')
    mapping_df = pd.read_csv(csv_path)
    final_mapping = {}
    for (
            zip_code,
            primary_city,
            acceptable_cities,
            unacceptable_cities,
            state,
            county,
    ) in zip(
            mapping_df["zip"].values,
            mapping_df["primary_city"].values,
            mapping_df["acceptable_cities"].values,
            mapping_df["unacceptable_cities"].values,
            mapping_df["state"].values,
            mapping_df["county"].values,
    ):
        a_cities = ""
        u_cities = ""
        if not pd.isna(acceptable_cities):
            a_cities = f", {acceptable_cities}"
        if not pd.isna(unacceptable_cities):
            u_cities = f", {unacceptable_cities}"
        final_mapping[f"{zip_code:05}"] = {
            f"{primary_city}{a_cities}{u_cities}": {
                county: str(us.states.lookup(state))
            }
        }

    with open(os.path.join(DIRECTORY, "mapping.json"), "w") as f:
        json.dump(final_mapping, f)
    return final_mapping
示例#11
0
def format_address_data(address_data, county_name):
    mapping = electionsaver.addressSchemaMapping
    # parsed_data_dict = usaddress.tag(addressData, tag_mapping=mapping)[0]

    # edge cases

    # lol doctor and drive have the same abbreviation
    if county_name == "Collier":
        address_data = address_data.replace("Rev Dr", "Reverend Doctor")

    # this county only has a PO Box, and I happened to click on the website
    # and find out there's an actual physical location lol.. got lucky
    if county_name == "Citrus":
        address_data = "1500 N. Meadowcrest Blvd. Crystal River, FL 34429"

    parsed_data_dict = {}
    try:
        parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0]
    except Exception as e:
        raise WalkTheVoteError(
            f"Error with data for {county_name} county, data is {parsed_data_dict}"
        ) from e

    final_address = {
        "city": parsed_data_dict["city"],
        "state": parsed_data_dict["state"],
        "zipCode": parsed_data_dict["zipCode"],
    }
    if "streetNumberName" in parsed_data_dict:
        final_address["streetNumberName"] = parsed_data_dict["streetNumberName"]
    if "poBox" in parsed_data_dict:
        final_address["poBox"] = parsed_data_dict["poBox"]
    final_address["locationName"] = parsed_data_dict.get(
        "locationName", f"{county_name} County Election Office"
    )
    if "aptNumber" in parsed_data_dict:
        final_address["aptNumber"] = parsed_data_dict["aptNumber"]
    return final_address
示例#12
0
def format_address_data(address_data,
                        county_name,
                        is_physical,
                        mailing_addr=None):
    if mailing_addr is None:
        mailing_addr = {}
    mapping = electionsaver.addressSchemaMapping

    parsed_data_dict = {}
    try:
        parsed_data_dict = usaddress.tag(address_data, tag_mapping=mapping)[0]
    except Exception as e:
        raise WalkTheVoteError(
            f"Error with data for {county_name} county, data is {parsed_data_dict}"
        ) from e

    final_address = {}

    # Edge cases: add missing info.
    if county_name == "SUMTER":
        parsed_data_dict["city"] = "Americus"

    # Sometimes info is only in mailing address, if data is missing in physical, add the info from mailing
    if "city" in parsed_data_dict:
        final_address["city"] = parsed_data_dict["city"].title()
    elif is_physical and "city" in mailing_addr:
        final_address["city"] = mailing_addr["city"].title()

    if "state" in parsed_data_dict:
        final_address["state"] = parsed_data_dict["state"].title()
    elif is_physical and "state" in mailing_addr:
        final_address["state"] = mailing_addr["state"].title()

    if "zipCode" in parsed_data_dict:
        final_address["zipCode"] = parsed_data_dict["zipCode"].title()
    elif is_physical and "zipCode" in mailing_addr:
        final_address["zipCode"] = mailing_addr["zipCode"].title()

    if "streetNumberName" in parsed_data_dict:
        final_address["streetNumberName"] = parsed_data_dict[
            "streetNumberName"].title()
    elif is_physical and "streetNumberName" in mailing_addr:
        final_address["streetNumberName"] = mailing_addr[
            "streetNumberName"].title()

    if "locationName" in parsed_data_dict:
        final_address["locationName"] = parsed_data_dict.get(
            "locationName").title()
    elif is_physical and "locationName" in mailing_addr:
        final_address["locationName"] = mailing_addr["locationName"].title()
    else:
        final_address[
            "locationName"] = f"{county_name.title()} County Election Office"

    if "aptNumber" in parsed_data_dict:
        final_address["aptNumber"] = parsed_data_dict["aptNumber"].title()
    elif is_physical and "aptNumber" in mailing_addr:
        final_address["aptNumber"] = mailing_addr["aptNumber"].title()

    if "poBox" in parsed_data_dict:
        final_address["poBox"] = parsed_data_dict["poBox"].title()
    return final_address