Пример #1
0
 def test_geographies_county(self):
     self.assertEqual(
         censusdata.geographies(
             censusdata.censusgeo([('state', '15'), ('county', '*')]),
             'acs5', 2015), {
                 'Hawaii County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '001')]),
                 'Honolulu County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '003')]),
                 'Kalawao County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '005')]),
                 'Kauai County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '007')]),
                 'Maui County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '009')]),
             })
     self.assertEqual(
         censusdata.geographies(
             censusdata.censusgeo([('state', '15'), ('county', '*')]),
             'acs1', 2015), {
                 'Hawaii County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '001')]),
                 'Honolulu County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '003')]),
                 'Kauai County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '007')]),
                 'Maui County, Hawaii':
                 censusdata.censusgeo([('state', '15'), ('county', '009')]),
             })
Пример #2
0
def get_censusgeos(geo_level):
    """
    Gets the censusgeo objects for the specified geography,
    and specific names (if specified)

    :param:
        geo_level (geoLevel object): geo level at which granularity the data
            needs to be obtained at
    :return:
        a list of censusgeo objects
    """

    # to obtain natiowide data
    if geo_level.name == 'us':
        final_geo = _get_geo('us')

    # to obtain state wise data
    else:
        state_geos = _get_geo('state', state_names)
        final_geo = state_geos

    # get the county level geographies
    if geo_level.level >= 1:
        # iterate over the states
        county_geos = []
        for i in range(len(state_names)):
            state_name = state_names[i]
            if county_names is None or county_names[i] is None:
                county_state_names = None
            else:
                county_state_names = [cn+", "+state_name for cn in county_names[i]]
            geo = _get_geo('county', county_state_names, list(state_geos[i].params()))

            # Census API doesn't support using wildcards for 'county' for lower levels of hierarchy
            if geo_level.level > 1 and county_state_names is None:
                all_geos = censusdata.geographies(geo[0], data_source, year, key=API_KEY)
                geo = all_geos.values()

            county_geos += geo
        final_geo = county_geos

    # the following part could be done in a simpler manner than what is done below, but this implementation
    # allows easy extension to the cases where we might need to specify specific tracts and blocks.

    # getting all tracts or blocks or block groups
    for level in [2,3]:
        if geo_level.level >= level:
            if level == 2:
                name = 'tract'
            else:
                name = geo_level.name       # could be 'block' or 'block group'
            level_geos = []
            for i in range((len(final_geo))):
                geo = _get_geo(name, None, list(final_geo[i].params()))
                level_geos += geo
            final_geo = level_geos
        else:
            break

    return final_geo
def GetStateNum(state_name):
    """Returns a state FIPS code for an input state name"""
    stategeo = cd.geographies(cd.censusgeo([('state', "*")]), 'acs5', 2019)

    statenum = str(stategeo[state_name]).split(":")[-1]

    return statenum
Пример #4
0
 def test_geographies_state(self):
     for year in range(2009, 2018 + 1):
         self.assertEqual(
             censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                    'acs5', year),
             {
                 'Alaska': censusdata.censusgeo([('state', '02')]),
                 'Alabama': censusdata.censusgeo([('state', '01')]),
                 'Arkansas': censusdata.censusgeo([('state', '05')]),
                 'Arizona': censusdata.censusgeo([('state', '04')]),
                 'California': censusdata.censusgeo([('state', '06')]),
                 'Colorado': censusdata.censusgeo([('state', '08')]),
                 'Connecticut': censusdata.censusgeo([('state', '09')]),
                 'District of Columbia': censusdata.censusgeo(
                     [('state', '11')]),
                 'Delaware': censusdata.censusgeo([('state', '10')]),
                 'Florida': censusdata.censusgeo([('state', '12')]),
                 'Georgia': censusdata.censusgeo([('state', '13')]),
                 'Hawaii': censusdata.censusgeo([('state', '15')]),
                 'Iowa': censusdata.censusgeo([('state', '19')]),
                 'Idaho': censusdata.censusgeo([('state', '16')]),
                 'Illinois': censusdata.censusgeo([('state', '17')]),
                 'Indiana': censusdata.censusgeo([('state', '18')]),
                 'Kansas': censusdata.censusgeo([('state', '20')]),
                 'Kentucky': censusdata.censusgeo([('state', '21')]),
                 'Louisiana': censusdata.censusgeo([('state', '22')]),
                 'Massachusetts': censusdata.censusgeo([('state', '25')]),
                 'Maryland': censusdata.censusgeo([('state', '24')]),
                 'Maine': censusdata.censusgeo([('state', '23')]),
                 'Michigan': censusdata.censusgeo([('state', '26')]),
                 'Minnesota': censusdata.censusgeo([('state', '27')]),
                 'Missouri': censusdata.censusgeo([('state', '29')]),
                 'Mississippi': censusdata.censusgeo([('state', '28')]),
                 'Montana': censusdata.censusgeo([('state', '30')]),
                 'North Carolina': censusdata.censusgeo([('state', '37')]),
                 'North Dakota': censusdata.censusgeo([('state', '38')]),
                 'Nebraska': censusdata.censusgeo([('state', '31')]),
                 'New Hampshire': censusdata.censusgeo([('state', '33')]),
                 'New Jersey': censusdata.censusgeo([('state', '34')]),
                 'New Mexico': censusdata.censusgeo([('state', '35')]),
                 'Nevada': censusdata.censusgeo([('state', '32')]),
                 'New York': censusdata.censusgeo([('state', '36')]),
                 'Ohio': censusdata.censusgeo([('state', '39')]),
                 'Oklahoma': censusdata.censusgeo([('state', '40')]),
                 'Oregon': censusdata.censusgeo([('state', '41')]),
                 'Pennsylvania': censusdata.censusgeo([('state', '42')]),
                 'Puerto Rico': censusdata.censusgeo([('state', '72')]),
                 'Rhode Island': censusdata.censusgeo([('state', '44')]),
                 'South Carolina': censusdata.censusgeo([('state', '45')]),
                 'South Dakota': censusdata.censusgeo([('state', '46')]),
                 'Tennessee': censusdata.censusgeo([('state', '47')]),
                 'Texas': censusdata.censusgeo([('state', '48')]),
                 'Utah': censusdata.censusgeo([('state', '49')]),
                 'Virginia': censusdata.censusgeo([('state', '51')]),
                 'Vermont': censusdata.censusgeo([('state', '50')]),
                 'Washington': censusdata.censusgeo([('state', '53')]),
                 'Wisconsin': censusdata.censusgeo([('state', '55')]),
                 'West Virginia': censusdata.censusgeo([('state', '54')]),
                 'Wyoming': censusdata.censusgeo([('state', '56')]),
             })
Пример #5
0
def getState(state, year, key):
    """Generate state identifier for ACS 5-year data."""
    allState = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                      'acs5',
                                      year,
                                      key=key)
    return allState[state].geo
Пример #6
0
def _get_geo(geotype, names=None, higher_list=None):
    """
    Helper function to obtain geographies from one level to the next

    :param:
        geotype (string) : name of the geography (e.g. 'state')
        names (list of string) : names of the specific geographical location
            you want to pull the data for,
            or None if you want it for all
        higher_list (list of tuples of string):
            the list of the higher level hierarchy of geo locations
            reaching upto that level
            (e.g. if 'geo' is 'tract', then this could be
            [('state', 'Pennsylvania'), ('county', 'York County')] )

    :return:
        list of censusgeo objects
    """
    if higher_list is None:
        higher_list = []
    geo = [censusdata.censusgeo(higher_list + [(geotype, '*')])]
    if names is not None:
        all_geos = censusdata.geographies(geo[0], data_source, year, key=API_KEY)
        geo = []
        for name in names:
            geo.append(all_geos[name])

    return geo
Пример #7
0
def getAllCounties(state, year, key):
    """Generate all county identifiers for ACS 5-year data."""
    state = getState(state, year, key)
    return censusdata.geographies(
        censusdata.censusgeo(list(state) + [('county', '*')]),
        'acs5',
        year,
        key=key)
Пример #8
0
def county_list(state_number):
    counties = censusdata.geographies(
        censusdata.censusgeo([('state', state_number), ('county', '*')]),
        'acs5', 2018)
    county_list = []
    for i in counties.keys():
        county_list.append(counties[i].geo[1][1])
    return county_list
Пример #9
0
def GetStateNum(state_name, year):

    stategeo = cd.geographies(cd.censusgeo([('state', "*")]), 'acs5',
                              int(year))

    statenum = str(stategeo[state_name]).split(":")[-1]

    return statenum
Пример #10
0
def fetch_metro_area_code(metro_area, survey=survey, year=year):
    geo = censusdata.censusgeo([
        ('metropolitan statistical area/micropolitan statistical area', '*')
    ])
    metro_areas = censusdata.geographies(geo, survey, year, key=census_api_key)
    for metro, code in metro_areas.items():
        if metro.startswith(metro_area):
            return re.search(r'\d+$', str(code)).group()
    return None
Пример #11
0
def get_acs_5_data(year, state, data_aliases):
    '''
    Get American Community Survey 5-year data at block group level

    Inputs:
        year (integer): year from which to pull data
        state (string): encoding of state for which to pull data
        data_aliases (dictionary; keys and values both strings): mapping of
            encoded data columns to pull from ACS with their descriptive names.
            Note that these descriptive names will be the headers of the output
            DataFrame. See below links for 2018 column encodings:
            https://api.census.gov/data/2018/acs/acs5/variables.html
            https://api.census.gov/data/2018/acs/acs1/variables.html
            https://api.census.gov/data/2018/acs/acsse/variables.html

    (For more information on Census geographies, please visit this link:
        https://www.census.gov/data/developers/geography.html)

    Output:
        A pandas dataframe with ACS data
    '''
    # Initialize dataframe
    if data_aliases:
        results_df = pd.DataFrame(columns=data_aliases.values())
    else:
        results_df = pd.DataFrame(columns=data_columns)

    # print("Data columns are...", data_aliases.keys())

    results_df['year'] = ""

    # Get Census data and load into dataframe
    geographies = censusdata.geographies(
        censusdata.censusgeo([('state', state), ('county', '*')]), 'acs5',
        year)

    for v in list(geographies.values()):
        ((_, _), (_, county_code)) = v.params()

        df = censusdata.download(
            "acs5",
            year,
            censusdata.censusgeo([("state", state), ("county", county_code),
                                  ("tract", "*")]),
            list(data_aliases.keys()),
            key="e62f1cebce1c8d3afece25fc491fbec7271a588b").reset_index()

        df = df.rename(columns=data_aliases)
        df['year'] = year

        results_df = results_df.append(df, ignore_index=True)

    results_df = results_df.infer_objects()

    return results_df
Пример #12
0
def fetch_tract_codes_by_county(state_code,
                                county_code,
                                survey=survey,
                                year=year):
    geo = censusdata.censusgeo([('state', state_code), ('county', county_code),
                                ('tract', '*')])
    tracts = censusdata.geographies(geo, survey, year, key=census_api_key)
    tract_codes = []
    for tract, code in tracts.items():
        code = re.search(r'\d+$', str(code)).group()
        tract_codes.append((tract, code))
    return tract_codes
Пример #13
0
def getAllCounties():
    states = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                    'acs5', 2018)

    all_states = pd.DataFrame()

    # For every state, get all counties
    for state in states:
        print("getting: ", state)
        state_fips = states[state].geo[0][1]
        counties = censusdata.geographies(
            censusdata.censusgeo([('state', state_fips), ('county', '*')]),
            'acs5', 2018)

        subject_data = censusdata.download('acs5',
                                           2018,
                                           censusdata.censusgeo([
                                               ('state', state_fips),
                                               ('county', '*')
                                           ]),
                                           list(subject_tables.keys()),
                                           tabletype='subject').reset_index()
        detail_data = censusdata.download(
            'acs5', 2018,
            censusdata.censusgeo([('state', state_fips), ('county', '*')]),
            list(detail_tables.keys())).reset_index()

        # Get correct fips for index
        subject_data['index'] = subject_data.apply(
            lambda row: getFips(row['index']), axis=1)
        detail_data['index'] = detail_data.apply(
            lambda row: getFips(row['index']), axis=1)

        # Join the tables and add to master table
        data = detail_data.merge(subject_data)
        all_states = pd.concat([all_states, data])

    # Set column names to human readable names
    all_states.set_axis(columns, axis=1, inplace=True)
    return all_states
Пример #14
0
def fetch_county_code(county, state=state, survey=survey, year=year):
    geo = censusdata.censusgeo([('county', '*')])
    response = censusdata.geographies(geo, survey, year, key=census_api_key)

    try:
        _county_ = f"{county} County, {state}"
        county_values = str(response[_county_])
    except Exception as err:
        _county_ = f"{county}, {state}"
        county_values = str(response[_county_])

    county_code = re.search(r"\d+$", county_values)
    return county_code.group()
def GetCountyNums(state_name, counties):
    """Returns a list of county FIPS codes for a list of counties in a particular state"""

    state_num = GetStateNum(state_name)

    countygeo = cd.geographies(
        cd.censusgeo([('state', state_num), ("County", '*')]), 'acs5', 2019)

    county_list = [
        str(countygeo[c.strip("'") + ", " + state_name]).split(":")[-1]
        for c in counties
    ]

    return county_list
Пример #16
0
def GetAllCounties(state_name, year):

    state_num = GetStateNum(state_name, year)

    countygeo = cd.geographies(
        cd.censusgeo([('state', state_num), ('county', '*')]), 'acs5',
        int(year))

    county_names = countygeo.keys()

    county_list = [
        str(countygeo[c]).split(":")[-1].strip("'") for c in county_names
    ]
    return county_list
Пример #17
0
def GetCountyNums(state_name, counties, year):

    state_num = GetStateNum(state_name, year)

    countygeo = cd.geographies(
        cd.censusgeo([('state', state_num), ("county", '*')]), 'acs5',
        int(year))

    county_list = [
        str(countygeo[c.strip("'") + ", " + state_name]).split(":")[-1]
        for c in counties
    ]

    return county_list
Пример #18
0
def build_states_cache(db_name):

    conn = sqlite3.connect(db_name)
    c = conn.cursor()

    c.executescript('''
        DROP TABLE IF EXISTS STATES;
        DROP TABLE IF EXISTS COUNTIES;
        CREATE TABLE STATES (state text, state_fips text);
        CREATE TABLE COUNTIES (state text, county text, county_fips text);
    ''')

    states = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                    'acs5', 2018)

    for state, state_geo in states.items():
        state_fips = state_geo.params()[0][1]

        c.execute(f'''
            INSERT INTO states VALUES ('{state}', '{state_fips}')
        ''')

    for state_name, state_geo in tqdm(states.items(),
                                      desc='Building counties cache'):
        counties = censusdata.geographies(
            censusdata.censusgeo([state_geo.geo[0], ('county', '*')]), 'acs5',
            2018)

        for county, county_geo in counties.items():
            # extracting county name from string formatted as "county, state"
            county_name = county.split(',')[:-1][0]
            county_fips = county_geo.geo[1][1]

            c.execute('INSERT INTO counties VALUES (?, ?, ?)',
                      (state_name, county_name, county_fips))

    conn.commit()
Пример #19
0
def fetch_state_code(state, survey=survey, year=year):
    geo = censusdata.censusgeo([('state', '*')])
    response = censusdata.geographies(geo, survey, year, key=census_api_key)
    state_values = str(response[state])
    state_code = re.search(r"\d+$", state_values)
    return state_code.group()
Пример #20
0
    union.columns = col_names

    union = union.apply(pd.to_numeric)

    union.replace({-666666666: None}, inplace=True)

    return union


metro_code = fetch_metro_area_code(metro_area)
MO_state_code = fetch_state_code(state)
IL_state_code = fetch_state_code(state="Illinois")

geo = censusdata.censusgeo([('state', MO_state_code), ('county', '*')])
MO_counties = censusdata.geographies(geo, survey, year, key=census_api_key)

geo = censusdata.censusgeo([('state', IL_state_code), ('county', '*')])
IL_counties = censusdata.geographies(geo, survey, year, key=census_api_key)

# MO_metro_county_codes = [fetch_county_code(county) for county in MO_metro_counties]
# IL_metro_county_codes = [fetch_county_code(county, state="Illinois") for county in IL_metro_counties]

MO_metro_tract_codes = [
    fetch_tract_codes_by_county(MO_state_code, county_code)
    for county_code in MO_metro_county_codes
]
IL_metro_tract_codes = [
    fetch_tract_codes_by_county(IL_state_code, county_code)
    for county_code in IL_metro_county_codes
]
Пример #21
0
import censusdata
import pandas as pd
import sqlite3
import ssl
import re
import os
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)

# %%
# Aggregate Population by Age
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01001'))

# %%
# Geographies by state>place
censusdata.geographies(censusdata.censusgeo([('state', '12'), ('place', '*')]),
                       'acs5', 2018)

# %%
# By County
censusdata.geographies(
    censusdata.censusgeo([('state', '12'), ('county', '*')]), 'acs5', 2018)

# %%
# By County Subdivision
censusdata.geographies(
    censusdata.censusgeo([('state', '12'), ('county', '057'),
                          ('county subdivision', '*')]), 'acs5', 2018)

# %%
# By State>County>County Subdivision>Place or Remainder
censusdata.geographies(
Пример #22
0
    'B02001_005E': 'asian_pop',
    'B25024_001E': 'units_in_struc',
    'B25004_001E': 'vacancy',
    'B15003_002E': 'edu_no_schooling',
    'B15003_017E': 'edu_hsd',
    'B15003_022E': 'edu_bs',
    'B15003_023E': 'edu_ms',
    'B15003_025E': 'edu_phd'
}
table_name = "census_data"

### End editable section

##Part 1 - get data from Census
states = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                'acs5',
                                2018,
                                key='db8c95da0a4bf1d0f0b43c6e66158daaef578790')
stategeo = states[state]

counties = censusdata.geographies(
    censusdata.censusgeo([stategeo.params()[0], ('county', '*')]),
    'acs5',
    2018,
    key='db8c95da0a4bf1d0f0b43c6e66158daaef578790')
countylist = list(counties.values())

#For each county in your chosen state, this will create a dataframe of all of the chosen variables down to the block group level
for county in countylist:
    params = county.params()
    if (county == countylist[0]):
        data = censusdata.download(
def load_acs_data(conn, variables, variable_names, table_name, survey, year):
    """
    Downloads specified data about the state of New York from the ACS and creates a table in the database

    Arguments:
    - conn: db connection
    - variables: list of variable names to pull from ACS survey
    - variable_names: dictionary containing the variables as keys and the desired column names as values
    - table_name: name of table to create in db
    - survey: name of acs survey to pull data from
    - year: year of acs survey data should be pulled from
    """
    # drop table if already exists
    run_sql_from_string(conn,
                        f'drop table if exists data_exploration.{table_name}')

    # mean income variables
    inc_vars = [
        'B19081_001E', 'B19081_002E', 'B19081_003E', 'B19081_004E',
        'B19081_005E'
    ]

    # get county income data
    countydata = censusdata.download(
        survey,
        year,
        censusdata.censusgeo([('state', '36'), ('county', '*')]),
        inc_vars,
        key='db8c95da0a4bf1d0f0b43c6e66158daaef578790')
    countydata['mean_county_income'] = countydata.mean(axis=1)
    countydata['county'] = [idx.geo[1][1] for idx in countydata.index]
    countydata.reset_index(inplace=True)
    countydata = countydata.drop(['index'], axis=1)

    # get data from census
    census_geo = [('state', '36'), ('county', '*')]
    counties = censusdata.geographies(
        censusdata.censusgeo(census_geo),
        survey,
        year,
        key='db8c95da0a4bf1d0f0b43c6e66158daaef578790')
    countylist = list(counties.values())

    for county in tqdm(countylist, desc='Load ACS data'):
        params = county.params()
        if county == countylist[0]:
            data = censusdata.download(
                survey,
                year,
                censusdata.censusgeo(
                    [params[0], params[1], ('block group', '*')]),
                variables,
                key='db8c95da0a4bf1d0f0b43c6e66158daaef578790')
        else:
            data = data.append(
                censusdata.download(
                    survey,
                    year,
                    censusdata.censusgeo(
                        [params[0], params[1], ('block group', '*')]),
                    variables,
                    key='db8c95da0a4bf1d0f0b43c6e66158daaef578790'))

    # transform data
    data.rename(columns=variable_names, inplace=True)
    data.reset_index(inplace=True)
    for i, col_name in enumerate(['state', 'county', 'tract', 'block group']):
        data[col_name] = data['index'].apply(lambda col: str(col.geo[i][1]))
    data = data.drop(['index'], axis=1)

    # merge county income data with other ACS data
    data = data.merge(countydata[['county', 'mean_county_income']],
                      'left',
                      on='county',
                      copy=False)

    # load on database
    data.to_sql(table_name, conn, schema='data_exploration', index=False)
Пример #24
0
table_shell = os.path.join(input_drive, 'ACS2017_Table_Shells.xlsx')
xl = pd.ExcelFile(table_shell)
table_shell_df = xl.parse(xl.sheet_names[0])
# variables I've flagged to use
use_vars = table_shell_df[table_shell_df.Use == 1]
print(use_vars[['TableID', 'Stub', 'Use']])
use_vars.to_csv(os.path.join(input_drive, 'ACS_variables.csv'))
variables = use_vars.TableID.tolist()

# Use the census data package
# Examples of functionality
censusdata.search('acs5', 2017, 'label', 'unemploy')
# censusdata.search('acs5', 2017, 'concept', 'education')
censusdata.printtable(censusdata.censustable('acs5', 2017, 'B23025'))
censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2017)
censusdata.geographies(
    censusdata.censusgeo([('state', '08'), ('county', '*')]), 'acs5', 2017)

# doesn't seem like the C variables work, so remove them
variables = [var for var in variables if 'C' not in var]
variables = [var for var in variables if "B17002" not in var]

# loop through all variables and merge data together
count = 0
for variable in variables:
    print(variable)
    data = censusdata_pull(variable)

    if count == 0:
        full_data = data
Пример #25
0
conn = sqlite3.connect('RCV.sqlite')
cur = conn.cursor()

sql_script = './queries/create_tb.sql'
queryfile = open(sql_script).read()
cur.executescript(queryfile)

# Gather State Data
table = 'states'
cur.execute('SELECT count(*) FROM "{}"'.format(table))
records = cur.fetchall()[0][0]
if records > 0:
    print('Table: "{0}" exists with {1} records'.format(table, records))
else:
    states = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                    'acs5',
                                    2018,
                                    key=PUBLIC_KEY)
    records = 0
    for state in states:
        records = records + 1
        fips = states[state].params()[0][1]
        try:
            cur.execute(
                'INSERT OR IGNORE INTO states (name, fips) VALUES (?,?)',
                (state, fips))
            conn.commit()
        except EOFError:
            print('Error inserting state:', state, fips)
            break
    print('Created: "states" table with {0}'.format(records))
Пример #26
0
import descartes, matplotlib.pyplot as plt
import fiona
import geopandas as gpd
import pandas as pd
import censusdata
import gc
gc.collect()

fp = "/Users/kmbrgandhi/Documents/Housing/Property_Assessments/propertyassessments/ACS2018/tl_2018_25_bg/tl_2018_25_bg.shp"
acs2018sf = gpd.read_file(fp)

counties = censusdata.geographies(
    censusdata.censusgeo([('state', '25'), ('county', '*')]), 'acs5', 2018)

list_of_counties = []
county_data = {}
for county in counties:
    county_name = county[:-15]
    county_val = counties[county]
    county_number = county_val.geo[1][1]
    county_data[county_name] = censusdata.download('acs5', 2018,
                             censusdata.censusgeo([('state', '25'), ('county', county_number), ('block group', '*')]),
                             ['B02001_001E', 'B02001_003E', "B03003_001E", "B03003_003E", "B25003_002E", "B25003_003E", \
                             'B15003_002E', 'B15003_003E', 'B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', \
                             'B15003_008E', 'B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E', \
                             'B15003_014E', 'B15003_015E', 'B15003_016E', 'B15003_017E','B15003_018E', 'B15003_019E',\
                              'B15003_020E', 'B15003_021E', 'B15003_022E', 'B15003_023E','B15003_024E', 'B15003_025E'])
    # grabbing the Massachusetts data we want for each county
#B02001 is the race table: pulling black and total to form ratio
#B03001 is the hispanic table: pulling hispanic and total to form ratio
#B19013 is overall median income, Note that I could get black and white specific median incomes from B19013A and B
!apt install libspatialindex-dev
!pip install CensusData geopandas earthpy

"""For working with Census data, we're doing to use the CensusData package. The CensusData package lets us access ACS data for a number of years, back until 2005 for 5-year estimates. Here is the [documentation](https://jtleider.github.io/censusdata/).

Here's an [example notebook](https://github.com/jtleider/censusdata/blob/master/docs/notebooks/example1.ipynb) which lets us see tract-level estimates for unemployment and high school dropout rates in Cook County, Illinois.
"""

import censusdata  
import pandas as pd
import numpy as np

"""You can get a link to all the Census geographies [here](https://jtleider.github.io/censusdata/geographies.html). Census geographies are defined on a hierarchical level. Since we're interested in both the Austin-Red Rock MSA, we can first get all the MSAs, as well as all the tracts in Texas."""

## Get all the MSAs in a dictionary object
msas = censusdata.geographies(censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '*')]), 'acs5', 2016)

## Get all the census tracts in Texas
tx_tracts = censusdata.geographies(censusdata.censusgeo([('state', '48'), ('tract', '*')]), 'acs5', 2016)

## Example of the available tract
list(tx_tracts.keys())[0:10]

## Getting the Austin-Round Rock MSA by the name
msas['Austin-Round Rock, TX Metro Area']

## Get all the counties tracts within the MSA
msas_counties = censusdata.geographies(censusdata.censusgeo([('state', '48'), ('metropolitan statistical area/micropolitan statistical area', '12420'), ('county', '*')]), 'acs5', 2016)

msas_counties
Пример #28
0
import numpy as np
import censusdata
import re

# In[2]:

# received in email by filling out https://api.census.gov/data/key_signup.html
# took a while after 'activating' to actually become active
KEY = "569913f1c2bf1c28df9b1f0c05f120b8691ef91b"

# In[3]:

# pennsylvania is 42
state_dict = censusdata.geographies(censusdata.censusgeo([
    ('state', '*'),
]),
                                    'acs5',
                                    2015,
                                    key=KEY)

# In[4]:

state_dict_lookup = {v.geo[0][1]: k for k, v in state_dict.items()}
state_dict_lookup

# In[5]:

# allegheny is '003' for testing
county_dict = censusdata.geographies(censusdata.censusgeo([('state', '42'),
                                                           ('county', '*')]),
                                     'acs5',
                                     2015,
Пример #29
0
    "B15003_017E": "HighSchoolEducation",
    "B15003_019E": "College_Less_1yrEducation",
    "B15003_020E": "College_More_1YREducation",
    "B15003_021E": "AssociateDegreeEducation",
    "B15003_022E": "BachelorDegreeEducation",
    "B15003_023E": "MasterDegreeEducation",
    "B15003_024E": "ProfessionalDegreeEducation",
    "B15003_025E": "DoctorateDegreeEducation"
}

variables_selected = list(variables_map.keys())

# In[276]:

state_geoMap = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                      'acs5',
                                      2018,
                                      key=_ACS_api_key)
state_censusgeo = state_geoMap[state_selected]
state_geo_Param = state_censusgeo.params()[0]

# In[277]:

# list all counties in that state
state_counties = censusdata.censusgeo([state_geo_Param, ('county', '*')])

# In[271]:

stateCounty_geoMap = censusdata.geographies(state_counties,
                                            'acs5',
                                            2018,
                                            key=_ACS_api_key)
Пример #30
0
"""
Created on Sat Sep 12 15:08:15 2020

@author: ckall
"""

#pip install CensusData
import pandas as pd
import censusdata
import re

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)

geo_county = censusdata.geographies(
    censusdata.censusgeo([('state', '*'), ('county', '*')]), 'acs5', 2018,
    '815f420d7978fe1866f81df31947126f9e1cc460')
geo_county_keys = list(dict.keys(geo_county))

dict_county_name = dict()
for i in geo_county_keys:
    k = ((i[:i.find(',')].rstrip()))
    j = (i[i.find(', '):]).replace(', ', '')
    if j not in dict_county_name:
        dict_county_name[j] = [k]
    else:
        value = k
        county = dict_county_name[j]
        county.append(value)
        dict_county_name[j] = county