Exemplo n.º 1
0
def dump_pop_weighted(states=None,
                      filebase='data/tl_2014_us_cd114',
                      states_filename='data/state.txt',
                      block_data_directory='data/faces/',
                      out_filename='data/convex_hull_pop.csv',
                      district_type='cd114'):
    if states is None:
        states = [state for state in get_states().keys() if int(state) < 60]
    districts = shapefile.Reader(filebase).shapeRecords()
    district_dict = collections.defaultdict(dict)
    for district in districts:
        district_dict[district.record[0]][district.record[1]] = district
    for state in states:
        data = []
        print "processing %s" % get_states()[state]
        bm = block_map(state, district_type, block_data_directory)
        ab = all_blocks(state, block_data_directory)
        br = block_reader(state, block_data_directory)
        bdict = {
            block: geo.shape(br(block).shape.__geo_interface__)
            for block in ab
        }
        for num in district_dict[state]:
            try:
                data.append(
                    (get_states()[state], num,
                     convex_hull_pop_weighted(district_dict[state][num],
                                              bm[num], ab, bdict, br)))
            except Exception as e:
                print "%s error in %s-%s" % (e, get_states()[state], num)
                raise e
        data.sort()
        with open(out_filename, 'a') as f:
            writer = csv.writer(f)
            writer.writerows(data)
Exemplo n.º 2
0
def dump_population_moments(states=None,
                            filebase='data/tl_2014_us_cd114',
                            states_filename='data/state.txt',
                            block_data_directory='/tmp/faces/',
                            out_filename='data/population_moments.csv',
                            district_type='cd114'):
    if states is None:
        # Filter out Puerto Rico etc. because there's no tabblock data for
        # them.
        states = [state for state in get_states().keys() if int(state) < 60]
    districts = shapefile.Reader(filebase).shapeRecords()
    # states -> dict of district number -> shapeRecord
    district_dict = collections.defaultdict(dict)
    for district in districts:
        district_dict[district.record[0]][district.record[1]] = district
    for state in states:
        data = []
        print "processing %s" % get_states()[state]
        bm = block_map(state, district_type, block_data_directory)
        br = block_reader(state, block_data_directory)
        for num in district_dict[state]:
            try:
                data.append((get_states()[state], num,
                             population_moment(district_dict[state][num],
                                               bm[num], br)))
            except Exception as e:
                print "%s error in %s-%s" % (e, get_states()[state], num)
        data.sort()
        with open(out_filename, 'a') as f:
            writer = csv.writer(f)
            writer.writerows(data)
Exemplo n.º 3
0
def block_map(state_fips, district_type, directory='/tmp/faces/'):
    record_index = {'cd114': 30, 'sldu': 31, 'sldl': 32}[district_type]
    all_files = os.listdir(directory)
    faces_files = [
        f for f in all_files
        if f.startswith('tl_2014_%s' % state_fips) and f.endswith('_faces.dbf')
    ]
    if not faces_files:
        raise ValueError("No files found")
    b_map = collections.defaultdict(list)
    for f in faces_files:
        with open(directory + f, 'rb') as dbf:
            sf = shapefile.Reader(dbf=dbf)
            for rec in sf.records():
                block_code = ''.join(rec[1:4]) + rec[5]
                cd = rec[record_index]
                b_map[block_code].append(cd)
    cd_map = collections.defaultdict(list)
    for block_code in b_map:
        # Sometimes blocks get split; just take the district that most of the
        # pieces are in.  This isn't perfect but it's a small difference.
        district_options = collections.Counter(b_map[block_code])
        cd = district_options.most_common(1)[0][0]
        cd_map[cd].append(block_code)
    return cd_map
Exemplo n.º 4
0
def dump_data(metric,
              filebase='data/tl_2014_us_cd114',
              states_filename='data/state.txt',
              out_filename=None):
    """Dumps a csv of state,district,metric.
    
    Notes:
      * States with at-large elections (i.e. a single rep) are denoted by
        district 00
      * Non-states with nonvoting reps (DC, PR, etc.) are listed and denoted by
        district 98 or 99
    """
    sf = shapefile.Reader(filebase)
    districts = sf.shapeRecords()
    states = get_states(states_filename)
    try:
        # data = [(dist.record[5], metric(dist.shape)) for dist in districts]
        data = [(states[dist.record[0]], dist.record[1], metric(dist.shape))
                for dist in districts if set(dist.record[1]) != set('Z')]
    except:
        print dist.record
        raise
    data.sort()
    if out_filename is None:
        out_filename = filebase + '_' + metric.func_name + '.csv'
    with open(out_filename, 'w') as f:
        writer = csv.writer(f)
        writer.writerows(data)
Exemplo n.º 5
0
def block_reader(state_fips, directory='data/faces/'):
    sf = shapefile.Reader(directory + 'tabblock2010_%s_pophu.shp' % state_fips)
    districts = enumerate(sf.records())
    indices_by_block_id = {record[4]: index for index, record in districts}

    def reader(block_id):
        if block_id in indices_by_block_id:
            return sf.shapeRecord(indices_by_block_id[block_id])
        else:
            print "block %s not found" % block_id
            # For some reason, a few blocks don't show up in the population
            # data.  Let's not worry about it.
            return None

    return reader
Exemplo n.º 6
0
def all_blocks(state_fips, directory='data/faces/'):
    sf = shapefile.Reader(directory + 'tabblock2010_%s_pophu.shp' % state_fips)
    blocks = [record[4] for record in sf.records()]
    return blocks