def mkdict(row): """Turn a geocoder response row into a well-formed dict""" d = dict(zip(geocoder_header, row)) if len(row) > 3: try: try: d['lat'], d['lon'] = d['latlon'].split(',') except ValueError as e: d['lat'], d['lon'] = '','' d['tract_geoid'] = str(Tract( int(d['state_fips']), int(d['county_fips']), int(d['tract_fips']) )) try: del d['latlon'] except Exception as e: pass except Exception as e: # These appear to be errors associated with quote characters in the addresses, like # 366426709,"8430 I AVENUE""", HESPERIA, CA," 92345""",No_Match. There aren't many of # them, but they are a problem print("ERROR for ", row, e, file=sys.stderr) d['input_address'] = '' d['match'] = 'Parse Error' return d
def parse_tiger_tract(self, v): from geoid.census import Tract if isinstance(v, (float, int)): v = '{:0>11}'.format(int(v)) return Tract.parse(v)
def extract_geoid(self, v, row): from geoid.census import Place, County, State, Cosub, Tract, Zcta from geoid.civick import GVid from ambry.valuetype import FailedValue, GeoAcsVT CA_STATE = 6 if row.geotype == 'PL': r = Place(CA_STATE, int(row.geotypevalue)).convert(GVid) elif row.geotype == 'CO': gt = row.geotypevalue assert int(gt[0:2]) == CA_STATE r = County(CA_STATE, int(gt[2:])).convert(GVid) elif row.geotype == 'CA' or row.geotype == 'ST': r = State(CA_STATE).convert(GVid) elif row.geotype == 'CD': r = Cosub.parse(row.geotypevalue).convert(GVid) elif row.geotype == 'CT': try: r = Tract.parse(row.geotypevalue).convert(GVid) except ValueError: r = Tract.parse('06'+row.geotypevalue).convert(GVid) elif row.geotype == 'ZC': r = Zcta.parse(row.geotypevalue).convert(GVid) elif row.geotype == 'NA': # Sub-state region, not a census area r = None elif row.geotype == 'RE': # Sub-state region, not a census area r = None elif row.geotype == 'R4': # Sub-state region, not a census area r = None elif row.geotype == 'MS': # Probably an MSA or similar r = None else: self.error("Unknown geotype {} in row {}".format(row.geotype, row)) r = None if r is None: return None return GeoAcsVT(r)
def extract_geoid(self, v, row): from geoid.census import Place, County, State, Cosub, Tract, Zcta from geoid.civick import GVid import ambry.valuetype CA_STATE = 6 if row.geotype == 'PL': r = Place(CA_STATE, int(row.geotypevalue)).convert(GVid) elif row.geotype == 'CO': gt = row.geotypevalue assert int(gt[0:2]) == CA_STATE r = County(CA_STATE, int(gt[2:])).convert(GVid) elif row.geotype == 'CA' or row.geotype == 'ST': r = State(CA_STATE).convert(GVid) elif row.geotype == 'CD': r = Cosub.parse(row.geotypevalue).convert(GVid) elif row.geotype == 'CT': try: r = Tract.parse(row.geotypevalue).convert(GVid) except ValueError: r = Tract.parse('06' + row.geotypevalue).convert(GVid) elif row.geotype == 'ZC': r = Zcta.parse(row.geotypevalue).convert(GVid) elif row.geotype == 'NA': # Sub-state region, not a census area r = None elif row.geotype == 'RE': # Sub-state region, not a census area r = None elif row.geotype == 'R4': # Sub-state region, not a census area r = None elif row.geotype == 'MS': # Probably an MSA or similar r = None else: self.error("Unknown geotype {} in row {}".format(row.geotype, row)) r = None if r is None: return ambry.valuetype.FailedValue(None) return ambry.valuetype.Geoid(r)
from geoid.census import Tract for row_n, was_geocoded, row in chunked_geocode(doc): if not row.get('tract_geoid'): row['tract_geoid'] = zip_to_tract(fac_zip[int(row['unique_id'])], int(row['unique_id'])) row['side_of_street'] = None row['tiger_id'] = None if row['tract_geoid']: if len(row['tract_geoid']) != 18: # It's probably still a Census Tract, so convert it to an Acs tract. row['tract_geoid'] = str(CensusTract.parse(row['tract_geoid'].zfill(11)).convert(AcsTract)) assert(len(row['tract_geoid'])) == 18, row['tract_geoid'] t = AcsTract.parse(row['tract_geoid']) #print(str(t), file=sys.stderr) row['state_fips'] = t.state row['county_fips'] = t.county row['tract_fips'] = t.tract if row.get('state_fips'): row['state_fips'] = str(row['state_fips']).zfill(2)