Python load_csv_columns示例，utils.load_csv_columns Python示例

示例#1

0

显示文件

文件： merge_data.py 项目： vandeng86/us-zipcodes-congress

def load_zccd(fn):
    column_map = {
        'State': 'state_fips',
        'ZCTA': 'zcta',
        'Congressional District': 'cd',
        'CongressionalDistrict': 'cd' # different spellings in natl and state specific files...
    }

    zccd = utils.load_csv_columns(fn, column_map, skip=1)
    return zccd

示例#2

0

显示文件

文件： merge_data.py 项目： vandeng86/us-zipcodes-congress

def load_fips(fn):
    column_map = {
        'STATE': 'state_fips',
        'STUSAB': 'state',
    }
    fips_data = utils.load_csv_columns(fn, column_map, delimiter='|')
    fips_dict = {}
    for row in fips_data:
        fips_dict[row['state_fips']] = row['state']
    return fips_dict

示例#3

0

显示文件

文件： test.py 项目： zqh375/us-zipcodes-congress

def test_hud():
    print "\t us, hud"

    our_data = load_csv_columns('zccd.csv')
    hud_data = load_csv_columns('zccd_hud.csv')
    print "length", len(our_data), len(hud_data)
    print

    our_states = list_key_set(our_data, 'state_abbr')
    hud_states = list_key_set(hud_data, 'state_abbr')
    print "states", len(our_states), len(hud_states)
    assert len(hud_states.difference(our_states)) == 0
    print "we added", our_states.difference(hud_states)
    print

    our_zctas = list_key_set(our_data, 'zcta')
    hud_zips = list_key_set(hud_data, 'zip')
    print "ZCTAs", len(our_zctas), len(hud_zips)
    # we should not be missing any hud zctas
    print "we added", len(our_zctas.difference(hud_zips))
    print "missing", len(hud_zips.difference(our_zctas))
    print sorted(list(hud_zips.difference(our_zctas)))
    print

    our_zcta_list = list_key_values(our_data, 'zcta')
    hud_zip_list = list_key_values(hud_data, 'zip')
    cds_changed = 0
    states_changed = set()
    for (n, l) in sorted(our_zcta_list.items()):
        our_cd_set = list_key_set(l, 'cd')
        hud_cd_set = list_key_set(hud_zip_list[n], 'cd')
        if hud_cd_set.symmetric_difference(our_cd_set):
            cds_changed += 1
            our_state = list_key_values(l, 'state_abbr')
            hud_state = list_key_values(hud_zip_list[n], 'state_abbr')
            #print "%s in %s-%s hud %s-%s" % (n, ','.join(our_state), ','.join(our_cd_set), ','.join(hud_state), ','.join(hud_cd_set))
            states_changed.update(our_state.keys())
            states_changed.update(hud_state.keys())

    print "CDs differing", cds_changed
    print "from states", states_changed
    print

示例#4

0

显示文件

文件： test.py 项目： zqh375/us-zipcodes-congress

def test_sunlight():
    print "\t new, old"

    new_data = load_csv_columns('zccd.csv')
    old_data = load_csv_columns('raw/old_sunlight_districts.csv')
    print "length", len(new_data), len(old_data)
    print

    new_states = list_key_set(new_data, 'state_abbr')
    old_states = list_key_set(old_data, 'state')
    print "states", len(new_states), len(old_states)
    assert len(old_states.difference(new_states)) == 0
    assert len(new_states.difference(old_states)) == 0
    print

    new_zctas = list_key_set(new_data, 'zcta')
    old_zctas = list_key_set(old_data, 'zipcode')
    print "ZCTAs", len(new_zctas), len(old_zctas)
    # we should not be missing any old zctas
    assert len(old_zctas.difference(new_zctas)) == 0
    print "added", new_zctas.difference(old_zctas)
    print

    new_zcta_list = list_key_values(new_data, 'zcta')
    old_zcta_list = list_key_values(old_data, 'zipcode')
    cds_changed = 0
    states_changed = set()
    for (n, l) in sorted(new_zcta_list.items()):
        new_cd_set = list_key_set(l, 'cd')
        old_cd_set = list_key_set(old_zcta_list[n], 'house_district')
        if old_cd_set.symmetric_difference(new_cd_set):
            cds_changed += 1
            new_state = list_key_values(l, 'state_abbr')
            old_state = list_key_values(old_zcta_list[n], 'state')
            print "%s was %s-%s now %s-%s" % (n, ','.join(old_state), ','.join(
                old_cd_set), ','.join(new_state), ','.join(new_cd_set))
            states_changed.update(new_state.keys())
            states_changed.update(old_state.keys())

    print "CDs changed", cds_changed
    print "from states", states_changed
    print

示例#5

0

显示文件

                            float(place['lat'])]
        },
        "properties": {
            "fips": place['fips']
        }
    }


if __name__ == "__main__":
    if len(sys.argv) > 1:
        fn = sys.argv[1]
    else:
        fn = relative_path("../raw/Gaz_places_national.txt")
    try:
        gazetteer = load_csv_columns(fn,
                                     GAZETTEER_COLUMNS,
                                     delimiter='\t',
                                     quoting=csv.QUOTE_NONE)
    except IOError:
        print "unable to load", fn
        sys.exit(-1)

    for (abbr, data) in split_dict_by(gazetteer, 'state').items():
        state_name = STATE_ABBR[abbr].replace(' ', '_')

        geojson_collection = {"type": "FeatureCollection", "features": list()}

        for place in data:
            geojson_collection['features'].append(geojson_feature(place))

        print "writing %d places in %s" % (len(data), state_name)
        out_fn = relative_path('../places/%s.geo.json' % state_name)

示例#6

0

显示文件

文件： merge_data.py 项目： vandeng86/us-zipcodes-congress

def append_missing_zips(zccd, states_list):
    states_fips = []
    for s in states_list:
        states_fips.append(STATE_TO_FIPS[s])

    # load zcta_county_rel, which has full entries for each state
    column_map = {
        'ZCTA5': 'zcta',
        'STATE': 'state_fips'
    }
    all_zips_list = utils.load_csv_columns('raw/zcta_county_rel_10.txt', column_map)
    missing_zips_states = collections.defaultdict(set)

    for z in all_zips_list:
        # dedupe with a defaultdict
        if z['state_fips'] in missing_zips_states[z['zcta']]:
            log.info('zcta %s already in %s' % (z['zcta'], z['state_fips']))
            continue
        else:
            missing_zips_states[z['zcta']].add(z['state_fips'])

        if z['state_fips'] in states_fips:
            zccd.append({
                'zcta': z['zcta'],
                'state_fips': z['state_fips'],
                'cd': '0' # at-large
            })

    # also include zipcodes from US Minor and Outlying Islands
    # which are not included in the zcta_county_rel file
    # these are copied from govt websites as available
    missing_islands = {
        'AS': ['96799'],
        'GU': ['96910', '96913', '96915', '96916', '96917', '96921', '96928', '96929', '96931', '96932'],
        'MP': ['96950', '96951', '96952'],
        'VI': ['00801', '00802', '00820', '00823', '00824', '00830', '00831','00841', '00840', '00850', '00851'],
        'PR': ['00981'] # not sure why this isn't in the country_rel, because there are a bunch of others listed
    }

    for (abbr, zcta_list) in missing_islands.items():
        for z in zcta_list:
            zccd.append({
                    'zcta': z,
                    'state_fips': STATE_TO_FIPS[abbr],
                    'state_abbr': abbr,
                    'cd': '0', # at-large
                })

    # Include some zipcodes that have small populations (so no ZCTA) but are otherwise noteworthy
    # from https://about.usps.com/who-we-are/postal-facts/fun-facts.htm
    # There are ~2,500 others used exclusively by businesses, but we don't have a list.
    missing_small_zips = {
        'AK': {
            '99950': '0', # Ketchikan has highest zip 
        },
        'AZ': {
            '85001': '7', # Phoenix convention center
            '85002': '7'  #
        },
        'NY': {
            '00501': '1', # Holtsville has IRS processing center with lowest zip
            '00544': '1', #
            '11249': '7,12', # Williamsburg split in 2011, not reflected in census
            '12301': '20', # Schenectady has GE plant with memorable zip
            '12345': '20'
        },
        'TX': {
            '78599': '15' # near US-Mexico border
        },
        'VA': {
            '22350': '8' # Botanical preserve in Alexandria
        }
    }

    for (abbr, zcta_cd_dict) in missing_small_zips.items():
        for (z, cd_list) in zcta_cd_dict.items():
            for cd in cd_list.split(','):
                zccd.append({
                        'zcta': z,
                        'state_fips': STATE_TO_FIPS[abbr],
                        'state_abbr': abbr,
                        'cd': cd,
                    })

    return zccd

示例#7

0

显示文件

        "properties": {
            "city": d['city'],
            "other_cities": d['other_cities'],
            "state": d['state'],
            "county": d['county'],
        }
    }


if __name__ == "__main__":
    if len(sys.argv) > 1:
        fn = sys.argv[1]
    else:
        fn = relative_path("../raw/zip_code_database.csv")
    try:
        zipcode_db = load_csv_columns(fn, ZIPCODE_COLUMNS)
    except IOError:
        print "unable to load", fn
        sys.exit(-1)

    print "loaded %s zipcodes" % len(zipcode_db)

    for (abbr, data) in split_dict_by(zipcode_db, 'state').items():
        state_name = STATE_ABBR.get(abbr, '').replace(' ', '_')

        geojson_collection = {"type": "FeatureCollection", "features": list()}

        for place in data:
            geojson_collection['features'].append(geojson_feature(place))

        print "writing %d places in %s" % (len(data), state_name)