def make_division_name(state, district): if state in AT_LARGE_DISTRICTS and district == 1: return states.lookup(state).name else: return "{state}'s {district} congressional district".format( state=states.lookup(state).name, district=ordinalize(district))
def get_counties_in_state(census_request, state_fips_code, max_number_of_counties=math.inf, specific_counties_only=None): requested_counties = census_request.sf1.get( fields='NAME', geo={ 'for': 'county:*', 'in': 'state:{0}'.format(state_fips_code) }) requested_state = states.lookup(state_fips_code) state_name = requested_state.name for requested_county in requested_counties: county_name = requested_county['NAME'].replace( ', {0}'.format(state_name), '') requested_county['NAME'] = county_name if specific_counties_only is not None: list_of_specific_counties = [] for specific_county in specific_counties_only: matching_county = next( (item for item in requested_counties if item['NAME'] == '{0} County'.format(specific_county)), None) list_of_specific_counties.append(matching_county) requested_counties = list_of_specific_counties if max_number_of_counties == math.inf: max_number_of_counties = len(requested_counties) requested_counties = requested_counties[:max_number_of_counties] return requested_counties
def __call__(self, form, field): # Get the selected state form_state = State(int(form.state.data)) # Parse the phone number into constituent parts try: phone = phonenumbers.parse(field.data, 'US') except phonenumbers.NumberParseException: raise ValidationError(self.ERROR_MSG) # Check if parsed number is a valid pattern foregin (US) if not phonenumbers.is_valid_number(phone): raise ValidationError(self.ERROR_MSG) # Check area prefix is valid for US states, and matches the selected state longest_prefix = 4 # Only return results for state name description (1XXX) (excl. city names) _state = _prefix_description_for_number(US_PHONE_GEODATA, longest_prefix, phone, 'en') try: assert _state lookup_state = states.lookup(_state) assert lookup_state state = State[lookup_state.abbr] # Raise Key error if state abbreviation not in local states assert state == form_state # Check that specified state same as parsed state except (AssertionError, KeyError): raise ValidationError('Invalid area code prefix')
def read(self, geo, params): ''' Queries Census API using the query variables filtered in self.api_variable :param geo: geography filters. e.g. {'state': 'OH', 'county': '*'} use '*' for 'all', which would return all as individual rows. :type geo: dict :param params: household parameters. e.g. {'type': 'husband_wife', 'has_children': True, 'children_age': 'under_6', 'race': ['black', 'asian']} :type params: dict :return: DataFrame of results from query :rtype: pandas.DataFrame ''' self.geo = geo self.params = params self.filter_api_variable() logger.info('Looking up the following variables\n%s' % self.api_variables) data = self.query_census(self.api_variables['row_id'].tolist()) dataframe = pandas.DataFrame(data) if 'state' in self.geo and not dataframe.empty: dataframe['state'] = dataframe['state'].apply(lambda state_fips: states.lookup(state_fips).abbr) if 'county' in self.geo and not dataframe.empty: dataframe['county'] = dataframe['county'].astype(float) # horizontal sum of queried tables dataframe['households'] = dataframe.filter(regex=('P038.*')).astype(float).sum(axis=1) cols_keep = list(self.geo.keys()) cols_keep.insert(0, 'households') return dataframe[cols_keep]
def read(self, geo, params): ''' Queries Census API using the query variables filtered in self.api_variable :param geo: geography filters. e.g. {'state': 'OH', 'county': '*'} use '*' for 'all', which would return all as individual rows. :type geo: dict :param params: population parameters. a dictionary of only 'sex', 'age', 'race' allowed. e.g. {'sex': 'male', 'age': range(20, 25), 'race': ['asian', 'white']} :type params: dict :return: DataFrame of results from query :rtype: pandas.DataFrame ''' self.geo = geo self.params = params self.filter_api_variable() logger.info('Looking up the following variables\n%s' % self.api_variables) data = self.query_census(self.api_variables['row_id'].tolist()) dataframe = pandas.DataFrame(data) if 'state' in self.geo and not dataframe.empty: dataframe['state'] = dataframe['state'].apply(lambda state_fips: states.lookup(state_fips).abbr) if 'county' in self.geo and not dataframe.empty: dataframe['county'] = dataframe['county'].astype(float) # horizontal sum of queried tables dataframe['population'] = dataframe.filter(regex=('PCT.*')).astype(float).sum(axis=1) cols_keep = list(self.geo.keys()) cols_keep.insert(0, 'population') return dataframe[cols_keep]
def getCountiesInState(stateFIPSCode, maxNumberOfCounties=math.inf, specificCountiesOnly=None): requestedCounties = censusRequest.sf1.get( fields=('NAME'), geo={ 'for': 'county:*', 'in': 'state:{0}'.format(stateFIPSCode) }) # clean up county names after API update ## remove ", StateName" requestedState = states.lookup(stateFIPSCode) stateName = requestedState.name for requestedCounty in requestedCounties: countyName = requestedCounty['NAME'].replace(', {0}'.format(stateName), '') requestedCounty['NAME'] = countyName if specificCountiesOnly != None: listOfSpecificCounties = [] for specificCounty in specificCountiesOnly: matchingCounty = next( (item for item in requestedCounties if item['NAME'] == '{0} County'.format(specificCounty)), None) listOfSpecificCounties.append(matchingCounty) requestedCounties = listOfSpecificCounties if maxNumberOfCounties == math.inf: maxNumberOfCounties = len(requestedCounties) requestedCounties = requestedCounties[:maxNumberOfCounties] return requestedCounties
def covidtracking_ustates(): ## ========== Import # import covidtracking data # see: data/covidtracking_update.py with open("../data/covidtracking/covidtracking_dfs.pickle", "rb") as file: covidtracking_dfs = pickle.load(file) state_census = pd.read_csv( '../data/usa_census/SCPRC-EST2019-18+POP-RES.csv') ## ========== Cleaning # convert date to datetime tables = ['states_daily'] # see covidtracking_dfs.keys() for table in tables: covidtracking_dfs[table]['date'] = pd.to_datetime( covidtracking_dfs[table]['date'], format="%Y%m%d") # we are goint to use for now just a few columns columns = ['state', 'date', 'negative', 'positive', 'death'] states_df = covidtracking_dfs['states_daily'].loc[:, columns] # Let's get the estimated 2019 population for each state state_abrs = states_df['state'].unique() state_names = [states.lookup(abr).name for abr in list(state_abrs)] states_info = pd.DataFrame(dict(abbreviation=state_abrs, name=state_names)) states_info = states_info.merge( state_census.loc[:, ['NAME', 'POPESTIMATE2019']], right_on='NAME', left_on='name', how='left').sort_values('POPESTIMATE2019') states_info.drop(columns=['NAME'], inplace=True) return (states_df, states_info)
def find_state(text): for pattern in state_names: if search(pattern, text, IGNORECASE): return ['United States', 'USA', pattern, lookup(str(pattern)).abbr] matches = match(city_to_state_pattern, text.lower()) if matches: k = matches.group(0) tokens = [city_to_state_dict.get(k.title(), np.nan)] else: tokens = [j for j in split("\s|,", text) if j not in ['in', 'la', 'me', 'oh', 'or']] for i in tokens: if match('\w+', str(i)): if lookup(str(i)): return ['United States', 'USA', i, lookup(str(i)).abbr]
def query_census(self, symbols): ''' Queries US census using the census python API :param symbols: variables from http://api.census.gov/data/2010/sf1/variables.html to query :type symbols: :return: list[str] :rtype: pandas.DataFrame ''' state_fips = '*' if 'state' in self.geo and self.geo['state'] != '*': state_fips = states.lookup(self.geo['state']).fips geo_keys = set(self.geo.keys()) logger.info('Querying using filters: ' + str(self.geo)) if set(['state']) == geo_keys: return self.census_api.state(symbols, state_fips) if set(['state', 'county']) == geo_keys: county = self.geo['county'] return self.census_api.state_county(symbols, state_fips, county) if set(['state', 'county', 'subdivision']) == geo_keys: county = self.geo['county'] subdivision_fips = self.geo['subdivision'] return self.census_api.state_county_subdivision(symbols, state_fips, county, subdivision_fips) if set(['state', 'county', 'tract']) == geo_keys: county = self.geo['county'] tract = self.geo['tract'] return self.census_api.state_county_tract(symbols, state_fips, county, tract) if set(['state', 'place']) == geo_keys: place = self.geo['place'] return self.census_api.state_place(symbols, state_fips, place) if set(['state', 'district']) == geo_keys: district = self.geo['district'] return self.census_api.state_district(symbols, state_fips, district) if set(['state', 'msa']) == geo_keys: msa = self.geo['msa'] return self.census_api.state_msa(symbols, state_fips, msa) if set(['state', 'csa']) == geo_keys: csa = self.geo['csa'] return self.census_api.state_csa(symbols, state_fips, csa) if set(['state', 'district', 'place']) == geo_keys: district = self.geo['district'] place = self.geo['place'] return self.census_api.state_district_place(symbols, state_fips, district, place) if set(['state', 'zipcode']) == geo_keys: zipcode = self.geo['zipcode'] return self.census_api.state_zipcode(symbols, state_fips, zipcode)
def main(state_abbreviation): api_key = "78ae8c422513eb7551e52f2adf65ee6b51847b9d" state_info = states.lookup(state_abbreviation) get_block_data(api_key, state_info) format_block_data(state_info) num_congressional_districts = get_num_congressional_districts( api_key, state_info) districts = create_districts(state_info, num_congressional_districts, 0.03) save_geojson(districts, state_info)
def get_census_data(): cong = C.acs5.state_congressional_district(list(CODES.keys()), "*", "*") cong = pd.DataFrame(cong) cong['Name'] = cong['state'].apply(lambda x: str(states.lookup(x))) #converting FIPS to state names cong['Name'] = cong['state'].apply(lambda x: str(states.lookup(x))) #Renaming code columns cong.rename(columns=CODES, inplace=True) #dropping obsolete congressional districts and Puerto Rico cong = cong[cong['congressional district'] != 'ZZ'] cong = cong[cong['Name'] != 'Puerto Rico'] #string cleaning with function above cong['Namelsad'] = \ cong['congressional district'].apply(generate_inputs.format_district) cong.sort_values(by=['Name', 'congressional district'], inplace=True) df = cong[[ 'Mexican-American Population', 'Latino Population', 'Total Population', 'Name', 'Namelsad' ]] return df
def _special_case_nameattr_equivalence(nameattr_a: x509.NameAttribute, nameattr_b: x509.NameAttribute): """ Return true if the two name attributes are equivalent for some special case. Assumes both nameattr have same oid :param nameattr_a: :param nameattr_b: :return: """ # if is a US state, and state names are just long or short form (i.e. CA == California) if nameattr_a.oid == NameOID.STATE_OR_PROVINCE_NAME and states.lookup(nameattr_a.value) == states.lookup( nameattr_b.value): return True # add in more special cases here return False
def main(): with open('output_archive/state_abbrs.txt') as fin: state_abbrs = [x.strip() for x in fin.readlines()] output = [] for state in state_abbrs: print(state) state_fips_code = states.lookup(state).fips state_fips = pd.read_csv('output_archive/fips_codes.txt', sep='\t', encoding='utf-16') state_fips = state_fips[state_fips['State Code (FIPS)'] == int( state_fips_code)] output.append( load_cities(get_city_list(state_fips), state, state_fips_code, state_fips)) export_output(output)
def generate_details(self, **kwargs) -> dict: details: Dict[str, Any] = {'genre': random.choice(['fantasy'])} details.update(self.fake.profile(sex=None)) details['pronouns'] = get_pronouns(details['sex']) details['is_gay'] = random.random() < .0195 details['is_married'] = random.random() < .43 details['has_kids'] = random.random() < .74 if details['is_married']: if details['is_gay']: details['spouse'] = { 'F': 'wife', 'M': 'husband' }[details['sex']] else: details['spouse'] = { 'F': 'husband', 'M': 'wife' }[details['sex']] addr = self.state_pattern.search(details['residence']) while not addr: details['residence'] = self.fake.address() addr = self.state_pattern.search(details['residence']) details['state'] = states.lookup(addr.group(1)).name details['signs'] = { 'astrological': get_astrological_sign(details['birthdate']), 'zodiac': get_zodiac_sign(details['birthdate']) } if random.random() < .3: details['inspired_by'] = {'sex': random.choice(['F', 'M'])} details['inspired_by']['relation'] = random.choice({ 'F': ['mother', 'sister', 'daughter'], 'M': ['father', 'brother', 'son'] }[details['inspired_by']['sex']]) else: details['inspired_by'] = None return details
def generate_2018_fips_df(path: str = FIPS_2018_URL) -> pd.DataFrame: """Downloads raw FIPS data from the 2018 source and transforms to the proper format.""" fips_df = pd.read_excel(path, dtype=str, engine="openpyxl", skiprows=range(4)) fips_df = fips_df.drop([FIPS_2018_SUMMARY_COL], axis="columns") # Filter to counties only fips_df = fips_df[( # Filter out sub-county level fips (fips_df[FIPS_2018_COUNTY_SUBDIVISION_COL] == "00000") & (fips_df[FIPS_2018_PLACE_COL] == "00000") & (fips_df[FIPS_2018_CITY_COL] == "00000") # Filter out state fips & (fips_df[FIPS_2018_COUNTY_COL] != "000"))] fips_df = fips_df.reset_index(drop=True) # Drop unnecessary columns fips_df = fips_df.drop( [ FIPS_2018_COUNTY_SUBDIVISION_COL, FIPS_2018_PLACE_COL, FIPS_2018_CITY_COL ], axis="columns", ) # Rename columns fips_df = fips_df.rename( columns={ FIPS_2018_STATE_COL: STATE_CODE_COL, FIPS_2018_COUNTY_COL: COUNTY_CODE_COL, FIPS_2018_AREA_NAME_COL: COUNTY_NAME_COL, }) # Add column with state abbreviation abbrev_col = fips_df.state_code.apply( lambda code: states.lookup(code, field="fips").abbr) fips_df.insert(loc=0, column=STATE_ABBREV_COL, value=abbrev_col) # Add columns with concatenated fips fips_df[FIPS_COL] = fips_df[STATE_CODE_COL] + fips_df[COUNTY_CODE_COL] return fips_df
def transform_population_df(pops_df: pd.DataFrame) -> pd.DataFrame: """Transforms the population data int to a more usable format. Adds fips information and unpivots the year columns into a single year column.""" # First column is missing name, name it "location" pops_df = pops_df.rename(columns={pops_df.columns[0]: TEMP_LOCATION_COL}) # Just keep the 2010 to 2019 estimated columns pops_df = pops_df.drop(["Census", "Estimates Base"], axis="columns") # County rows start with ".", remove it pops_df = pops_df[pops_df[TEMP_LOCATION_COL].str.startswith(".")] pops_df[TEMP_LOCATION_COL] = pops_df[TEMP_LOCATION_COL].str[1:] # Location is of form "county name, state name", pull these into their own columns location_col = pops_df.pop(TEMP_LOCATION_COL).str.split(",", expand=True) pops_df[TEMP_COUNTY_NAME_COL] = location_col[0].str.strip() pops_df[TEMP_STATE_NAME_COL] = location_col[1].str.strip() # Get the state code to group by pops_df[TEMP_STATE_CODE_COL] = pops_df[TEMP_STATE_NAME_COL].apply( lambda state_name: "US_" + states.lookup(state_name, field="name" ).abbr) # Group by state, add fips to each row in each group pops_df = pops_df.groupby([TEMP_STATE_CODE_COL ]).apply(add_fips_to_state_df) # Only keep fips id column if not pops_df[FIPS_COL].is_unique: duplicate_rows = pops_df[pops_df[FIPS_COL].duplicated(keep=False)] raise ValueError( f"Dataframe contains duplicate fips:\n{duplicate_rows}", ) pops_df = pops_df.drop( [TEMP_COUNTY_NAME_COL, TEMP_STATE_NAME_COL, TEMP_STATE_CODE_COL], axis="columns") # Unpivot 2010 to 2019 columns into single year column pops_df = pops_df.melt(id_vars=[FIPS_COL], var_name=YEAR_COL, value_name=POPULATION_COL) pops_df[YEAR_COL] = pops_df[YEAR_COL].astype(int) return pops_df
def build_search(state, place): """Combine and normalize place name and state into search.""" state, place = state.strip(), place.strip().lower() if state == '': return None, place place = re.sub(r'^close to\s*', '', place) state = states.lookup(state.strip()) if place.endswith(state.name.lower()): # search string contains full state name already return state.abbr, place elif place.endswith(', ' + state.abbr.lower()): # search string contains state abbreviation already return state.abbr, place else: # add state abbreviation to search string place = place.strip(',') return state.abbr, place + ', ' + state.abbr.lower()
def data(): fips = flask.request.args.get('fips') if not fips: flask.abort(400) fips = fips.zfill(4) state_fips = fips[:2] state = states.lookup(state_fips) state_name = state.name state_abbr = state.abbr district_fips = fips[2:] result = { 'state_name': state_name, 'state_abbr': state_abbr, 'district': district_name(district_fips) } query = CommentData.query query = query.filter(CommentData.state_fips == state_fips) query = query.filter(CommentData.district_fips == district_fips) query = query.order_by(func.random()) # NOTE: may be slow for large tables comment_data = query.first() if comment_data: result.update({ 'fcc_link': 'https://www.fcc.gov/ecfs/filing/{}'.format(comment_data.id), 'name': comment_data.name.title(), 'city': comment_data.city.title(), 'comment': comment_data.comment.replace('\n', '<br>'), }) return flask.jsonify(result)
def __init__(self, census_json_data): self.state = states.lookup(census_json_data['state']) self.county = census_json_data['county'] self.tract = census_json_data['tract'] population_total_variable_name = CensusTractRacePopulation.CENSUS_VARIABLE_TOTAL_POPULATION + \ CensusTractRacePopulation.VARIABLE_SUFFIX_ESTIMATE self.population_total_est = int(census_json_data[population_total_variable_name]) race_variable_prefixes = CensusTractRacePopulation.get_all_races() self.population_by_race_est = {} self.population_by_race_pctg = {} for race_prefix in race_variable_prefixes: est_variable_name = race_prefix + CensusTractRacePopulation.VARIABLE_SUFFIX_ESTIMATE pctg_variable_name = race_prefix + CensusTractRacePopulation.VARIABLE_SUFFIX_ESTIMATE_PERCENT self.population_by_race_est[race_prefix] = int(census_json_data[est_variable_name]) percent = census_json_data[pctg_variable_name] percent = percent if percent > 0.0 else 0.0 self.population_by_race_pctg[race_prefix] = percent
def title(state_fips, district_fips, count): state_name = states.lookup(state_fips).name return '%s\'s %s District\n%d comments' % ( state_name, district_name(district_fips), count)
districtGeometries = EsriDumper( url='https://tigerweb.geo.census.gov/arcgis/rest/services/Generalized_ACS2017/Legislative/MapServer/5', extra_query_args={'where': 'STATE=\'{0}\''.format(stateFIPSCode)}) # https://github.com/openaddresses/pyesridump existingDistricts = [] for districtGeometry in districtGeometries: geoJSONGeometry = districtGeometry['geometry'] districtNumber = districtGeometry['properties']['BASENAME'] existingDistrict = ExistingDistrict(districtNumber=districtNumber, geoJSONGeometry=geoJSONGeometry) existingDistricts.append(existingDistrict) return existingDistricts stateAbbreviation = 'MI' stateInfo = states.lookup(stateAbbreviation) censusYear = 2010 descriptionToWorkWith = 'All' allCongressionalDistrictGeosInState = getAllGeoDataForFederalCongressionalDistricts(stateFIPSCode=stateInfo.fips) # save county data to file saveDataToFileWithDescription(data=allCongressionalDistrictGeosInState, censusYear=censusYear, stateName=stateInfo.name, descriptionOfInfo='{0}CurrentFederalCongressionalDistricts'.format(descriptionToWorkWith)) saveGeoJSONToDirectoryWithDescription(geographyList=allCongressionalDistrictGeosInState, censusYear=censusYear, stateName=stateInfo.name, descriptionOfInfo='CurrentFederalCongressionalDistricts')
zero_zip = 0 no_fips = 0 bad_state = 0 with open(infile) as fin, open(outfile, 'w') as fout: reader = csv.DictReader(fin) writer = csv.writer(fout) num_rows = 0 for row in reader: num_rows += 1 if not num_rows % 20000: print(num_rows) state = row['state'] state_lookup = states.lookup(state) if state_lookup in [states.AK, states.DC, states.DE, states.MT, states.ND, states.PR, states.SD, states.VT, states.WY]: state_fips, district_fips = state_lookup.fips, '00' else: zip_code = row['zip_code'].zfill(5) if zip_code == '00000': zero_zip += 1 continue fips = ZIP2FIPS.get(zip_code) if fips: state_fips, district_fips = fips else: no_fips += 1 continue
def chooseState(): if debugMode: processState('NH', 5) # Testing mode does NH with 5 districts else: # User input: choose to run all states or custom single state modeChoice = raw_input( 'How would you like to run?\n1) Run all states using 2010 House apportionments\n2) Choose a single state\n' ) # Run all states mode if modeChoice == '1': # 2010 apportionments of US House districts districtCounts = (('AL', 7), ('AK', 1), ('AZ', 9), ('AR', 4), ('CA', 53), ('CO', 7), ('CT', 5), ('DE', 1), ('FL', 27), ('GA', 14), ('HI', 2), ('ID', 2), ('IL', 18), ('IN', 9), ('IA', 4), ('KS', 4), ('KY', 6), ('LA', 6), ('ME', 2), ('MD', 8), ('MA', 9), ('MI', 14), ('MN', 8), ('MS', 4), ('MO', 8), ('MT', 1), ('NE', 3), ('NV', 4), ('NH', 2), ('NJ', 12), ('NM', 3), ('NY', 27), ('NC', 13), ('ND', 1), ('OH', 16), ('OK', 5), ('OR', 5), ('PA', 18), ('RI', 2), ('SC', 7), ('SD', 1), ('TN', 9), ('TX', 36), ('UT', 4), ('VT', 1), ('VA', 11), ('WA', 10), ('WV', 3), ('WI', 8), ('WY', 1)) # Run processState on each state, skipping states with 1 district for state in districtCounts: if state[1] > 1: processState(*state) else: print "Skipping single-district state" # Run single state mode elif modeChoice == '2': # User input: state to process state = raw_input( 'Which state would you like to process? (Two letter abbreviation) ' ) if not states.lookup(state): print "Invalid state entered!" exit() # User input: number of districts to create maxDistricts = raw_input('Number of districts to create? ') try: int(maxDistricts) except ValueError: print "Invalid number of districts chosen!" exit() else: maxDistricts = int(maxDistricts) # Run processState on selected state with selected number of districts processState(state, maxDistricts) else: print "Invalid run mode chosen!" exit()
def processState(state, maxDistricts): def getNeighbors(d): allNeighbors = [adjacencyMatrix.neighbors[x] for x in d] allNeighbors = [ m for n in allNeighbors for m in n if m not in assignedList ] return list(set(allNeighbors)) fips = states.lookup(state).fips print "Beginning districting on %s (FIPS %s) with %s districts" % ( state, fips, maxDistricts) # Check if geometry file exists; download if not geomDir = 'data-raw/%s/geometry/' % cellularUnit geomFile = ('%stl_2010_%s_tract10.shp') % (geomDir, fips) if os.path.isfile(geomFile): print "Found geometry file" else: if not os.path.exists(geomDir): os.makedirs(geomDir) print "Acquiring geometry file from Census" urllib.urlretrieve( states.lookup(state).shapefile_urls('tract'), ('%s%s.zip') % (geomDir, state)) with zipfile.ZipFile(('%s%s.zip') % (geomDir, state)) as zip: zip.extractall(geomDir) os.remove(('%s%s.zip') % (geomDir, state)) # Check if gazeteer file exists; download if not gazDir = 'data-raw/%s/gazetteer/' % cellularUnit gazFile = ('%scensus_tracts_list_%s.txt') % (gazDir, fips) if os.path.isfile(gazFile): print "Found gazetteer file" else: if not os.path.exists(gazDir): os.makedirs(gazDir) print "Acquiring gazeteer file from Census" urllib.urlretrieve(( 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/census_tracts_list_%s.txt' ) % (fips), ('%scensus_tracts_list_%s.txt') % (gazDir, fips)) # Read in the geometry file with GeoPandas try: geometry = gpd.read_file(geomFile) geometry = geometry[[ 'GEOID10', 'geometry' ]] # Lose everything except FIPS code and geometry print "Successfully read geometry file" except: print "Problem reading geometry file" exit() # Read in the gazeteer file with Pandas try: gazetteer = pd.read_table(gazFile, dtype={'GEOID': 'object'}) gazetteer = gazetteer[[ 'GEOID', 'POP10', 'ALAND' ]] # Lose everything except FIPS code, pop, land area gazetteer['density'] = gazetteer['POP10'] / gazetteer[ 'ALAND'] # Compute density across all tracts print "Successfully read gazetteer file" except: print "Problem reading gazetteer file" exit() # Create a joined data frame dataFrame = geometry.merge(gazetteer, left_on='GEOID10', right_on='GEOID') dataFrame['district'] = 0 # New blank variable for district assignment if holeFiller: fullShape = dataFrame.unary_union # Create an adjanceny matrix using pysal if contiguityType == 'rook': adjacencyFunction = Contiguity.Rook.from_dataframe elif contiguityType == 'queen': adjacencyFunction = Contiguity.Queen.from_dataframe else: print 'Invalid contiguity type set!' exit() adjacencyMatrix = adjacencyFunction(geometry) print "Adjacency matrix built" popThreshold = dataFrame['POP10'].sum( ) / maxDistricts # How many people should be in each district assignedList = [] # List to hold indices all assigned tracts, for speed for d in range(1, maxDistricts + 1): districtPop = 0 districtMembers = [ ] # List to hold indices of tracts assigned to this district, for speed seed = dataFrame[dataFrame['district'] == 0]['density'].idxmax() # Find the densest unassigned districtPop = districtPop + dataFrame['POP10'][seed] dataFrame.set_value(seed, 'district', d) districtMembers.append(seed) assignedList.append(seed) print "Beginning district %d, seeding with %s %s, running population %d" % ( d, cellularUnit, dataFrame['GEOID10'][seed], districtPop) while districtPop < popThreshold: possibleNeighbors = getNeighbors(districtMembers) if len(possibleNeighbors) == 0: print "No possible neighbors to add!" break bestNeighbor = dataFrame.iloc[possibleNeighbors]['density'].idxmax( ) districtPop = districtPop + dataFrame['POP10'][bestNeighbor] dataFrame.set_value(bestNeighbor, 'district', d) districtMembers.append(bestNeighbor) assignedList.append(bestNeighbor) print "Adding %s, running population %d" % ( dataFrame['GEOID10'][bestNeighbor], districtPop) if holeFiller: fullShape = fullShape.difference( dataFrame.iloc[bestNeighbor].geometry) if fullShape.geom_type == 'MultiPolygon': print "A hole or exclave has been created!" for part in fullShape: partTracts = dataFrame[dataFrame.geometry.within( part)].index.tolist() partPop = dataFrame.iloc[partTracts].POP10.sum() if partPop < popThreshold - districtPop: dataFrame.loc[partTracts, "district"] = d districtPop = districtPop + partPop assignedList.extend(partTracts) districtMembers.extend(partTracts) fullShape = fullShape.difference(part) print "Filled a hole or exclave" # Build a choropleth map p = dataFrame.plot(column='district', categorical=True, legend=True) plt.show()
def label(self): return str(states.lookup(self.name))
def get_state_abbr(state_name): state = states.lookup(state_name) if state is not None: return state.abbr else: raise NameError(f'No state found for {state_name}.')
## Import API census from census import Census ## Import library to decode FIPS to places from us import states ## Paste your api key in the line bellow api_key = "PASTE_YOUR_API_KEY_HERE" c = Census(api_key) c.acs5.get(('NAME', 'B25034_010E'), {'for': 'state:{}'.format(states.MD.fips)}) ## 36 = New York print(states.lookup('36').abbr) print(c.acs5.tables())
class PhiladelphiaVaccine(TableauDashboard): state_fips = int(states.lookup("Pennsylvania").fips) has_location = True location_type = "county" provider = "county" source = ( "https://www.phila.gov/programs/coronavirus-disease-2019-covid-19/data/vaccine/" ) source_name = "Philadelphia Department of Public Health" baseurl = "https://healthviz.phila.gov/t/PublicHealth/" viewPath = "COVIDVaccineDashboard/COVID_Vaccine" data_tableau_table = "Residents Percentage {dose_type}" variables = { "Residents Receiving At Least 1 Dose* ": variables.INITIATING_VACCINATIONS_ALL, "Fully Vaccinated Residents*": variables.FULLY_VACCINATED_ALL, } def fetch(self) -> pd.DataFrame: # create a dict of the 2 dose type tables # which are titled "Residents Percentage New" and "... Full" return { dose_type: self.get_tableau_view( dose_type=dose_type)[self.data_tableau_table.format( dose_type=dose_type)] for dose_type in ["New", "Full"] } def normalize(self, data: pd.DataFrame) -> pd.DataFrame: dataframes = [] for dose_type in ["New", "Full"]: dose_data = (data[dose_type].rename( columns={ "Measure Values-alias": "value", "Measure Names-alias": "variable", } ).loc[:, ["value", "variable"]].query( "variable in" "['Residents Receiving At Least 1 Dose* ', 'Fully Vaccinated Residents*']" ).assign( location=42101, value=lambda x: pd.to_numeric(x["value"].str.replace(",", "")), vintage=self._retrieve_vintage(), ).pipe( self._rename_or_add_date_and_location, location_column="location", timezone="US/Eastern", )) dataframes.append(dose_data) data = (self.extract_CMU( df=pd.concat(dataframes), cmu=self.variables).drop( columns={"variable"}).reset_index(drop=True)) # break scraper if both init and completed variables are not included in data vars = {"total_vaccine_initiated", "total_vaccine_completed"} assert vars <= set(data["category"]) return data # could not find a way to select the "Demographics New" dashboard tab in the usual manner, # so edit request body to manually select Demographic tab/sheets # this is the default function with only form_data["sheet_id"] altered def get_tableau_view(self, dose_type, url=None): def onAlias(it, value, cstring): return value[it] if (it >= 0) else cstring["dataValues"][abs(it) - 1] req = requests_retry_session() fullURL = self.baseurl + "/views/" + self.viewPath reqg = req.get( fullURL, params={ ":language": "en", ":display_count": "y", ":origin": "viz_share_link", ":embed": "y", ":showVizHome": "n", ":jsdebug": "y", ":apiID": "host4", "#navType": "1", "navSrc": "Parse", }, headers={"Accept": "text/javascript"}, ) soup = BeautifulSoup(reqg.text, "html.parser") tableauTag = soup.find("textarea", {"id": "tsConfigContainer"}) tableauData = json.loads(tableauTag.text) parsed_url = urllib.parse.urlparse(fullURL) dataUrl = f'{parsed_url.scheme}://{parsed_url.hostname}{tableauData["vizql_root"]}/bootstrapSession/sessions/{tableauData["sessionid"]}' # copy over some additional headers from tableauData form_data = {} form_map = { "sheetId": "sheet_id", "showParams": "showParams", "stickySessionKey": "stickySessionKey", } for k, v in form_map.items(): if k in tableauData: form_data[v] = tableauData[k] # set sheet manually to access the subsheets we need form_data["sheet_id"] = f"Demographics {dose_type}" resp = req.post( dataUrl, data=form_data, headers={"Accept": "text/javascript"}, ) # Parse the response. # The response contains multiple chuncks of the form # `<size>;<json>` where `<size>` is the number of bytes in `<json>` resp_text = resp.text data = [] while len(resp_text) != 0: size, rest = resp_text.split(";", 1) chunck = json.loads(rest[:int(size)]) data.append(chunck) resp_text = rest[int(size):] # The following section (to the end of the method) uses code from # https://stackoverflow.com/questions/64094560/how-do-i-scrape-tableau-data-from-website-into-r presModel = data[1]["secondaryInfo"]["presModelMap"] metricInfo = presModel["vizData"]["presModelHolder"] metricInfo = metricInfo["genPresModelMapPresModel"]["presModelMap"] data = presModel["dataDictionary"]["presModelHolder"] data = data["genDataDictionaryPresModel"]["dataSegments"]["0"][ "dataColumns"] scrapedData = {} for metric in metricInfo: metricsDict = metricInfo[metric]["presModelHolder"][ "genVizDataPresModel"] columnsData = metricsDict["paneColumnsData"] result = [{ "fieldCaption": t.get("fieldCaption", ""), "valueIndices": columnsData["paneColumnsList"][t["paneIndices"][0]] ["vizPaneColumns"][t["columnIndices"][0]]["valueIndices"], "aliasIndices": columnsData["paneColumnsList"][t["paneIndices"][0]] ["vizPaneColumns"][t["columnIndices"][0]]["aliasIndices"], "dataType": t.get("dataType"), "paneIndices": t["paneIndices"][0], "columnIndices": t["columnIndices"][0], } for t in columnsData["vizDataColumns"] if t.get("fieldCaption")] frameData = {} cstring = [t for t in data if t["dataType"] == "cstring"][0] for t in data: for index in result: if t["dataType"] == index["dataType"]: if len(index["valueIndices"]) > 0: frameData[f'{index["fieldCaption"]}-value'] = [ t["dataValues"][abs(it)] for it in index["valueIndices"] ] if len(index["aliasIndices"]) > 0: frameData[f'{index["fieldCaption"]}-alias'] = [ onAlias(it, t["dataValues"], cstring) for it in index["aliasIndices"] ] df = pd.DataFrame.from_dict(frameData, orient="index").fillna(0).T scrapedData[metric] = df return scrapedData
def lookup_state(self, data): state = states.lookup(data["state"]) data["state"] = state.name return data
census_client = Census(open('API.txt').readline().strip()) # census package uses old endpoints for years before 2015 # it's a workaround monkeypatch def _switch_endpoints(year): census_client.acs5.endpoint_url = 'https://api.census.gov/data/%s/acs/%s' census_client.acs5.definitions_url = 'https://api.census.gov/data/%s/acs/%s/variables.json' census_client.acs5.definition_url = 'https://api.census.gov/data/%s/acs/%s/variables/%s.json' census_client.acs5.groups_url = 'https://api.census.gov/data/%s/acs/%s/groups.json' census_client.acs5._switch_endpoints = _switch_endpoints state = states.lookup('Texas') total_population = 'B01001_001E' household_income = 'B19001_001E' median_home_value = 'B25077_001E' median_income_value = 'B06011_001E' # all data below in only for "in labor force" male_below_poverty = 'B17005_004E' unemployed_male_below_poverty = 'B17005_006E' female_below_poverty = 'B17005_009E' unemployed_female_below_poverty = 'B17005_011E' male_above_poverty = 'B17005_015E' unemployed_male_above_poverty = 'B17005_017E' female_above_poverty = 'B17005_020E' unemployed_female_above_poverty = 'B17005_022E'
def fromStateToAbbr(state): search = states.lookup(state) if search is not None: return search.abbr else: return np.nan