/
passport_geocoder.py
75 lines (59 loc) · 2.56 KB
/
passport_geocoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import csv
import geocoder
INPUTFILE = "C:\\Users\\nathanhilbert\\workspace\\Passports\\Raw Data\\Acceptance Facilities.csv"
HAS_HEADERS = True
OUTPUT = "C:\\Users\\nathanhilbert\\workspace\\Passports\\CleanedData\\Acceptance Facilities.csv"
DEBUG = True
LIMIT = 10
counter = 0
tempoutput = []
successdict = {"success":0, "fails":0}
with open(INPUTFILE, 'rb') as csvinput:
isfirst = True
headers = []
thereader = csv.reader(csvinput, delimiter=',')
for row in thereader:
counter +=1
if isfirst and HAS_HEADERS:
tempoutput.append(row + ["lat", "lon"])
headers = row
isfirst = False
continue
dictedinput = dict(zip(headers, row))
#OSM was not accurate, switch to google
g = geocoder.yahoo(dictedinput['Address'] + "," + dictedinput['City'] + "," + dictedinput['State'] + "," + dictedinput['Zip Code'])
if DEBUG:
print g
print "looking at ", dictedinput['Address'] + "," + dictedinput['City'] + "," + dictedinput['State'] + "," + dictedinput['Zip Code']
if g.lat and g.lng:
tempoutput.append(row + [g.lat, g.lng])
successdict['success'] += 1
if DEBUG:
print g.lat, g.lng
else:
g = geocoder.google(dictedinput['Address'] + "," + dictedinput['City'] + "," + dictedinput['State'] + "," + dictedinput['Zip Code'])
if g.lat and g.lng:
tempoutput.append(row + [g.lat, g.lng])
successdict['success'] += 1
if DEBUG:
print g.lat, g.lng
else:
g = geocoder.osm(dictedinput['Address'] + "," + dictedinput['City'] + "," + dictedinput['State'] + "," + dictedinput['Zip Code'])
if g.lat and g.lng:
tempoutput.append(row + [g.lat, g.lng])
successdict['success'] += 1
if DEBUG:
print g.lat, g.lng
else:
successdict['fails'] += 1
print "there was an error with", dictedinput['Address'] + "," + dictedinput['City'] + "," + dictedinput['State'] + "," + dictedinput['Zip Code']
tempoutput.append(row + ["x", "x"])
# if DEBUG:
# if counter > LIMIT:
# break
print "Success:", successdict['success']
print "Fails:", successdict['fails']
with open(OUTPUT, 'wb') as csvoutput:
thewriter = csv.writer(csvoutput, delimiter=',')
for temprow in tempoutput:
thewriter.writerow(temprow)