def counties(self): from geoid.acs import AcsGeoid from geoid.civick import GVid, State d = {AcsGeoid.parse(row.geoid).county_name.medium_name:str(AcsGeoid.parse(row.geoid).convert(GVid) ) for row in self.dep('geofile50') if row.state == 6} d['California'] = State(6) return d
def sub_geoids(v): """Replace state abbreviations and state fips codes with state and national geoids""" from geoid.censusnames import stusab from geoid.acs import Us, State, AcsGeoid if len(v) == 2: v = v.upper() stmap = {v: k for k, v in stusab.items()} if v == 'US': return str(Us()) if v in stmap: return str(State(stmap[v])) # Maybe it is a state number try: v = int(v) return str(State(v)) except ValueError: pass return str(AcsGeoid.parse(v))
def shape_url(self): """Return the shapefile URL""" from geoid.acs import AcsGeoid us = tiger_url(self.year, self.summary_level, AcsGeoid.parse(self.geoid).stusab) return parse_app_url(us)
def geo_url(self): """Return a url for the geofile for this Census file""" from geoid.acs import AcsGeoid us = tiger_url(self.year, self.summary_level, AcsGeoid.parse(self.geoid).stusab) return parse_app_url(us)
def __init__(self, ref, cache=None, working_dir=None, **kwargs): from geoid.acs import AcsGeoid from publicdata.census.files.metafiles import TableMeta super().__init__(ref, cache, working_dir, **kwargs) gid = AcsGeoid.parse(self.ref.geoid) self.table = Table(self.ref.year, self.ref.release, gid.stusab, str(self.ref.summary_level), self.ref.tableid) self._meta = TableMeta(self.ref.year, self.ref.release) assert isinstance(ref, CensusUrl)
def parse_to_gvid(v): """Parse an ACS Geoid or a GVID to a GVID""" from geoid.civick import GVid from geoid.acs import AcsGeoid m1 = '' try: return GVid.parse(v) except ValueError as e: m1 = str(e) try: return AcsGeoid.parse(v).convert(GVid) except ValueError as e: raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
def generate_all(sumlevel, d): """Generate a dict that includes all of the available geoid values, with keys for the most common names for those values. """ from geoid.civick import GVid from geoid.tiger import TigerGeoid from geoid.acs import AcsGeoid sumlevel = int(sumlevel) d = dict(d.items()) # Map common name variants if 'cousub' in d: d['cosub'] = d['cousub'] del d['cousub'] if 'blkgrp' in d: d['blockgroup'] = d['blkgrp'] del d['blkgrp'] if 'zcta5' in d: d['zcta'] = d['zcta5'] del d['zcta5'] gvid_class = GVid.resolve_summary_level(sumlevel) if not gvid_class: return {} geoidt_class = TigerGeoid.resolve_summary_level(sumlevel) geoid_class = AcsGeoid.resolve_summary_level(sumlevel) try: return dict( gvid=str(gvid_class(**d)), geoid=str(geoid_class(**d)), geoidt=str(geoidt_class(**d)) ) except: raise
def __init__(self, ref, cache=None, working_dir=None, **kwargs): from geoid.acs import AcsGeoid from publicdata.census.files.metafiles import TableMeta super().__init__(ref, cache, working_dir, **kwargs) gid = AcsGeoid.parse(self.ref.geoid) try: stusab = gid.stusab except AttributeError: gid.us# "US" level geoids don't have a state parameter. stusab = "US" self.table = Table(self.ref.year, self.ref.release, stusab, str(self.ref.summary_level), self.ref.tableid) self._meta = TableMeta(self.ref.year, self.ref.release) self._source_url = kwargs.get('source_url') assert(self._source_url) assert isinstance(ref, CensusUrl)
def __iter__(self): from ambry import get_library import censuslib.dataframe import pandas as pd # The district NCES codes aren't in the district file, although they are in the school file. schools = self.bundle.partition(table='schools').analysis.dataframe() schools['cd_code'] = schools.cdscode.apply(lambda cdscode: cdscode[:7]) nces_districts = schools[schools.statustype=='Active'][['ncesdist', 'cd_code']].drop_duplicates() # The actual districts file ca_districts = self.bundle.partition(table='districts').analysis.dataframe()[ ['cd_code', 'county_sos','county_fips','county_gvid','county','district']] #assert len(nces_districts) == len(ca_districts) # Combine the codes from the school file with the district file cd_code_districts = ca_districts.set_index('cd_code').join(nces_districts.set_index('cd_code')).reset_index() ## ## Join the California state districts list with the Census districts list ### from geoid.acs import AcsGeoid from geoid.civick import GVid dist_pred = lambda row: row.state ==6 def mk_cd_code(nces): return '06{:05d}'.format(nces) # Combine the three partitions for school districts in the census, and extract the NCES code elem = self.bundle.dep('elementary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy() elem['nces'] = elem.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdelm) ) second = self.bundle.dep('secondary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy() second['nces'] = second.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdsec) ) unified = self.bundle.dep('unified').analysis.dataframe(dist_pred)[['geoid', 'name']].copy() unified['nces'] = unified.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sduni) ) # Add a GVID census_districts = pd.concat([elem, second, unified], axis=0) census_districts['gvid'] = census_districts.geoid.apply(lambda geoid: AcsGeoid.parse(geoid).convert(GVid) ) # Do the join districts = cd_code_districts.set_index('ncesdist').join(census_districts.drop_duplicates().set_index('nces')).reset_index() districts.columns = ['ncesdist'] + list(districts.columns)[1:] # These are different sizes, don't know why. The de-duplicated census file is smaller than the # list from California, probably because many of the districts are smaller than the reporting limits. # NOTE: Becase join is a left join, (a) it must be joined in the order above ( cd_code_districts, the larger list, # is the base ) and (b) the joined 'districts' dataframe will have some missing geoids. # The missing geoids appears to be primarily for County offices of education and districts in small counties. # >>> print len(census_districts), len(census_districts.drop_duplicates()), len(cd_code_districts), len(districts) # >>> 1976 988 1098 1098 districts.gvid.fillna(value='', inplace = True) districts.geoid.fillna(value='', inplace = True) districts.name.fillna(value='', inplace = True) districts.ncesdist.fillna(value=0, inplace = True) df = districts.reset_index() yield ['id'] + list(df.columns) for index, row in df.iterrows(): yield [index] + list(row)