def load_zccd(fn): column_map = { 'State': 'state_fips', 'ZCTA': 'zcta', 'Congressional District': 'cd', 'CongressionalDistrict': 'cd' # different spellings in natl and state specific files... } zccd = utils.load_csv_columns(fn, column_map, skip=1) return zccd
def load_fips(fn): column_map = { 'STATE': 'state_fips', 'STUSAB': 'state', } fips_data = utils.load_csv_columns(fn, column_map, delimiter='|') fips_dict = {} for row in fips_data: fips_dict[row['state_fips']] = row['state'] return fips_dict
def test_hud(): print "\t us, hud" our_data = load_csv_columns('zccd.csv') hud_data = load_csv_columns('zccd_hud.csv') print "length", len(our_data), len(hud_data) print our_states = list_key_set(our_data, 'state_abbr') hud_states = list_key_set(hud_data, 'state_abbr') print "states", len(our_states), len(hud_states) assert len(hud_states.difference(our_states)) == 0 print "we added", our_states.difference(hud_states) print our_zctas = list_key_set(our_data, 'zcta') hud_zips = list_key_set(hud_data, 'zip') print "ZCTAs", len(our_zctas), len(hud_zips) # we should not be missing any hud zctas print "we added", len(our_zctas.difference(hud_zips)) print "missing", len(hud_zips.difference(our_zctas)) print sorted(list(hud_zips.difference(our_zctas))) print our_zcta_list = list_key_values(our_data, 'zcta') hud_zip_list = list_key_values(hud_data, 'zip') cds_changed = 0 states_changed = set() for (n, l) in sorted(our_zcta_list.items()): our_cd_set = list_key_set(l, 'cd') hud_cd_set = list_key_set(hud_zip_list[n], 'cd') if hud_cd_set.symmetric_difference(our_cd_set): cds_changed += 1 our_state = list_key_values(l, 'state_abbr') hud_state = list_key_values(hud_zip_list[n], 'state_abbr') #print "%s in %s-%s hud %s-%s" % (n, ','.join(our_state), ','.join(our_cd_set), ','.join(hud_state), ','.join(hud_cd_set)) states_changed.update(our_state.keys()) states_changed.update(hud_state.keys()) print "CDs differing", cds_changed print "from states", states_changed print
def test_sunlight(): print "\t new, old" new_data = load_csv_columns('zccd.csv') old_data = load_csv_columns('raw/old_sunlight_districts.csv') print "length", len(new_data), len(old_data) print new_states = list_key_set(new_data, 'state_abbr') old_states = list_key_set(old_data, 'state') print "states", len(new_states), len(old_states) assert len(old_states.difference(new_states)) == 0 assert len(new_states.difference(old_states)) == 0 print new_zctas = list_key_set(new_data, 'zcta') old_zctas = list_key_set(old_data, 'zipcode') print "ZCTAs", len(new_zctas), len(old_zctas) # we should not be missing any old zctas assert len(old_zctas.difference(new_zctas)) == 0 print "added", new_zctas.difference(old_zctas) print new_zcta_list = list_key_values(new_data, 'zcta') old_zcta_list = list_key_values(old_data, 'zipcode') cds_changed = 0 states_changed = set() for (n, l) in sorted(new_zcta_list.items()): new_cd_set = list_key_set(l, 'cd') old_cd_set = list_key_set(old_zcta_list[n], 'house_district') if old_cd_set.symmetric_difference(new_cd_set): cds_changed += 1 new_state = list_key_values(l, 'state_abbr') old_state = list_key_values(old_zcta_list[n], 'state') print "%s was %s-%s now %s-%s" % (n, ','.join(old_state), ','.join( old_cd_set), ','.join(new_state), ','.join(new_cd_set)) states_changed.update(new_state.keys()) states_changed.update(old_state.keys()) print "CDs changed", cds_changed print "from states", states_changed print
float(place['lat'])] }, "properties": { "fips": place['fips'] } } if __name__ == "__main__": if len(sys.argv) > 1: fn = sys.argv[1] else: fn = relative_path("../raw/Gaz_places_national.txt") try: gazetteer = load_csv_columns(fn, GAZETTEER_COLUMNS, delimiter='\t', quoting=csv.QUOTE_NONE) except IOError: print "unable to load", fn sys.exit(-1) for (abbr, data) in split_dict_by(gazetteer, 'state').items(): state_name = STATE_ABBR[abbr].replace(' ', '_') geojson_collection = {"type": "FeatureCollection", "features": list()} for place in data: geojson_collection['features'].append(geojson_feature(place)) print "writing %d places in %s" % (len(data), state_name) out_fn = relative_path('../places/%s.geo.json' % state_name)
def append_missing_zips(zccd, states_list): states_fips = [] for s in states_list: states_fips.append(STATE_TO_FIPS[s]) # load zcta_county_rel, which has full entries for each state column_map = { 'ZCTA5': 'zcta', 'STATE': 'state_fips' } all_zips_list = utils.load_csv_columns('raw/zcta_county_rel_10.txt', column_map) missing_zips_states = collections.defaultdict(set) for z in all_zips_list: # dedupe with a defaultdict if z['state_fips'] in missing_zips_states[z['zcta']]: log.info('zcta %s already in %s' % (z['zcta'], z['state_fips'])) continue else: missing_zips_states[z['zcta']].add(z['state_fips']) if z['state_fips'] in states_fips: zccd.append({ 'zcta': z['zcta'], 'state_fips': z['state_fips'], 'cd': '0' # at-large }) # also include zipcodes from US Minor and Outlying Islands # which are not included in the zcta_county_rel file # these are copied from govt websites as available missing_islands = { 'AS': ['96799'], 'GU': ['96910', '96913', '96915', '96916', '96917', '96921', '96928', '96929', '96931', '96932'], 'MP': ['96950', '96951', '96952'], 'VI': ['00801', '00802', '00820', '00823', '00824', '00830', '00831','00841', '00840', '00850', '00851'], 'PR': ['00981'] # not sure why this isn't in the country_rel, because there are a bunch of others listed } for (abbr, zcta_list) in missing_islands.items(): for z in zcta_list: zccd.append({ 'zcta': z, 'state_fips': STATE_TO_FIPS[abbr], 'state_abbr': abbr, 'cd': '0', # at-large }) # Include some zipcodes that have small populations (so no ZCTA) but are otherwise noteworthy # from https://about.usps.com/who-we-are/postal-facts/fun-facts.htm # There are ~2,500 others used exclusively by businesses, but we don't have a list. missing_small_zips = { 'AK': { '99950': '0', # Ketchikan has highest zip }, 'AZ': { '85001': '7', # Phoenix convention center '85002': '7' # }, 'NY': { '00501': '1', # Holtsville has IRS processing center with lowest zip '00544': '1', # '11249': '7,12', # Williamsburg split in 2011, not reflected in census '12301': '20', # Schenectady has GE plant with memorable zip '12345': '20' }, 'TX': { '78599': '15' # near US-Mexico border }, 'VA': { '22350': '8' # Botanical preserve in Alexandria } } for (abbr, zcta_cd_dict) in missing_small_zips.items(): for (z, cd_list) in zcta_cd_dict.items(): for cd in cd_list.split(','): zccd.append({ 'zcta': z, 'state_fips': STATE_TO_FIPS[abbr], 'state_abbr': abbr, 'cd': cd, }) return zccd
"properties": { "city": d['city'], "other_cities": d['other_cities'], "state": d['state'], "county": d['county'], } } if __name__ == "__main__": if len(sys.argv) > 1: fn = sys.argv[1] else: fn = relative_path("../raw/zip_code_database.csv") try: zipcode_db = load_csv_columns(fn, ZIPCODE_COLUMNS) except IOError: print "unable to load", fn sys.exit(-1) print "loaded %s zipcodes" % len(zipcode_db) for (abbr, data) in split_dict_by(zipcode_db, 'state').items(): state_name = STATE_ABBR.get(abbr, '').replace(' ', '_') geojson_collection = {"type": "FeatureCollection", "features": list()} for place in data: geojson_collection['features'].append(geojson_feature(place)) print "writing %d places in %s" % (len(data), state_name)