Пример #1
0
	#The Authority name is uniform across counties.

	if county_name == "Westchester":
		fax = "914-995-3190, 914-995-7753"
		review = review + "b"
	else:
		fax = dogcatcher.find_phone(fax_re, county)

	official_name = official_name_re.findall(county)[0]
	if "<br>" in official_name.lower():
		print county
		print official_name
		sys.exit()

	first_name, last_name, review = dogcatcher.split_name(official_name, review)

	#This section finds the full address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.

	address = address_re.findall(county)[0]

	print address

	csz = csz_re.findall(address)[0]

	if po_re.findall(address):
		po_street = " ".join(po_re.findall(address)[0].replace("<br>","").strip(", ").split())

	street = address.replace(po_street,"").replace(csz,"").replace("<br>",", ").replace("<BR>",", ").replace("<Br>",", ")
Пример #2
0
county_data = county_data_re.findall(data)

print "County number ", county_data.length

for county in county_data:

    authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(
        voter_state)

    county_data_item = county_data_item_re.findall(county)
    county_name = county_name_re.findall(county)[0]

    official_name = name_re.findall(county)[0].replace("\r\n", "").lstrip()

    first_name, last_name, review = dogcatcher.split_name(
        official_name, review)

    #NC gives two addresses: a mailing address and a street, each formatted "Streeet <br /> City, State Zip". The mailing address may be identical to thes treet address.
    #This gets the address by running a RE to grab each and split it at the "<br />".
    #It then checks whether the mailing and non-mailing addresses are identical. If not

    po_address = " ".join(county_data_item[1].replace(
        "\r\n", "").split()).partition("<br />")
    address = " ".join(county_data_item[2].replace(
        "\r\n", "").split()).partition("<br />")
    print "__________________________________________________"

    if po_address == address:
        address_state = state_re.findall(address[2])[0].strip()
        city = city_re.findall(address[2])[0].strip()
        zip_code = zip_re.findall(address[2])[0].strip()
Пример #3
0
	authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state)

	county_name = county_names[county_list.index(county)].title().replace(" County","")

	authority_name = "Voter Registration Office"

	print "______________________________________________________________________\n" + county_name


	try:
		name = name_re.findall(county)[0]
	except:
		name = ""

	first_name, last_name, review = dogcatcher.split_name(name, review)

	mailing_address = mailing_address_re.findall(county)[0]

	try:
		po_street = po_re.findall(mailing_address)[0].strip()
	except:
		po_street = ""

	street = ""

	for item in street_re.findall(mailing_address):
		street = (street + ", " + item).strip(", ")

	street = street.replace(po_street, "").replace(", , ",", ").strip(", ")
Пример #4
0
	website = dogcatcher.find_website(website_re, county_web)
	reg_website = website


	email = dogcatcher.find_emails(email_re, absentee)
	reg_email = dogcatcher.find_emails(email_re, registration)

	phone = dogcatcher.find_phone(phone_re, absentee)
	reg_phone = dogcatcher.find_phone(phone_re, registration)


	# print absentee
	# print registration
	print name_re.findall(absentee)
	absentee_official = name_re.findall(absentee)[0].replace("</div>","") #In one county, the regular expression used yields </div> as a response. The other easy fix creates more problems, so we just remove the </div>.
	first_name, last_name, review = dogcatcher.split_name(absentee_official, review)

	if absentee_official:
		authority_name = direct_re.findall(absentee)[2].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .")
	else:
		authority_name = direct_re.findall(absentee)[1].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .")


	reg_official = name_re.findall(registration)[0].replace("</div>","") #In one county, the regular expression used yields </div> as a response. The other easy fix creates more problems, so we just remove the </div>.
	reg_first, reg_last, review = dogcatcher.split_name(reg_official, review)

	print ["0", reg_official]
	print ["1", absentee]
	print ["2", regweb]

	if reg_official and direct_re.findall(registration):
Пример #5
0
data = dogcatcher.po_standardize(data)

county_data = county_data_re.findall(data)

#In each county, there are separate offices for registration and absentee ballots. This separates those offices and then applies essentially identical procedures to both.
for county in county_data:
	authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state)


	county_name = county_name_re.findall(county)[0]

	#This isolates the county clerk data from the complete county.
	clerk = clerk_re.findall(county)[0]

	clerk_name = name_re.findall(clerk)[0]
	first_name, last_name, review = dogcatcher.split_name(clerk_name, review)

	phone = dogcatcher.find_phone(phone_re, clerk)

	fax = dogcatcher.find_phone(fax_re, clerk)

	website = dogcatcher.find_website(website_re, clerk)

	hours = " ".join(hours_re.findall(clerk)[0].replace("<br>\n"," ").split())

	#It's hard to get the address without also getting the clerk's name.
	#So we first find the address, remove the clerk's name, and clean up a few html tags.
	#That can leave a mess of commas, so we clean that up.
	#We then extract the City, State, and Zip (CSZ) and check for a PO Box.
	#We then remove the CSZ and PO box from the address to form the street, and check whether it exists.
	#We then check whether there's anything left to be a street address. If there is, we clean it and trim it down to one line.
Пример #6
0
    authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(
        voter_state)

    county_name = county_names[county_list.index(county)].title().replace(
        " County", "")

    authority_name = "Voter Registration Office"

    print "______________________________________________________________________\n" + county_name

    try:
        name = name_re.findall(county)[0]
    except:
        name = ""

    first_name, last_name, review = dogcatcher.split_name(name, review)

    mailing_address = mailing_address_re.findall(county)[0]

    try:
        po_street = po_re.findall(mailing_address)[0].strip()
    except:
        po_street = ""

    street = ""

    for item in street_re.findall(mailing_address):
        street = (street + ", " + item).strip(", ")

    street = street.replace(po_street, "").replace(", , ", ", ").strip(", ")
Пример #7
0
	if county_name_re.findall(county)[0].strip() != county_name_re.findall(reg_county)[0].strip():
		print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
		print "The lists don't match. Breaking the code."
		print county, reg_county
		print county_name_re.findall(county)[0], county_name_re.findall(reg_county)[0]
		print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
		sys.exit()

	authority_name = county_data_item[0]
	reg_authority_name = reg_county_data_item[0]

	county_name = county_name_re.findall(county)[0].title().replace(" County","")

	official_name = county_data_item[1]
	first_name, last_name, review = dogcatcher.split_name(official_name, review)

	reg_official_name = reg_county_data_item[1]
	reg_first, reg_last, review = dogcatcher.split_name(reg_official_name, review)

	fax = dogcatcher.find_phone(fax_re, county)

	reg_fax = dogcatcher.find_phone(reg_fax_re, reg_county)

	phone = dogcatcher.find_phone(phone_re, county)

	reg_phone = dogcatcher.find_phone(reg_phone_re, reg_county)

	print "_____________________________________"

	#This section finds the address for the absentee official.