def __init__(self, year, release, stusab, summary_level, tableid): ## HACK! This initializer will download files. It should not from geoid.censusnames import stusab as state_name_map super().__init__(year, release, stusab, summary_level, seq=None) self.meta = tablemeta(year, release) self.tableid = tableid.strip().lower() try: self.table = self.meta.tables[self.tableid] except KeyError as e: alt_c = 'c' + self.tableid[1:] alt_b = 'b' + self.tableid[1:] if (self.tableid.startswith('b') and alt_c in self.meta.tables): other_msg = f" However, table '{alt_c}' exists" elif (self.tableid.startswith('c') and alt_b in self.meta.tables): other_msg = f" However, table '{alt_b}' exists" else: other_msg = '' raise SourceError(f"Table metadata does not include table '{self.tableid}' " + other_msg) self.state_abs = list(state_name_map.values()) if self.stusab.upper() == 'US' else [self.stusab] self.item_getters = [] self.lr_pos = None self._collect_sequences()
def build_block_maps(pkg): cache = get_cache(pkg) states = list(stusab.values()) grid_key = 'blocks/map/source/utm' cache.put(grid_key, pkg.reference('utm_grid').geoframe()) cbsa_key = 'blocks/map/source/cbsa' cache.put(cbsa_key, pkg.reference('cbsa').geoframe().to_crs(4326)) tasks = [(cache, st, grid_key, cbsa_key) for st in states] try: import appnope with appnope.nope_scope(): r = run_mp(_f_block_maps, tasks) except ImportError: r = run_mp(_f_block_maps, tasks) cbsa_map = pd.concat([cache.get(e[0]) for e in r])\ utm_map = pd.concat([cache.get(e[1]) for e in r]) pkg_root = Path(pkg.path).parent cbsa_map.to_csv(pkg_root.joinpath('data', 'cbsa_map.csv'), index=False) utm_map.to_csv(pkg_root.joinpath('data', 'utm_map.csv'), index=False)
def __init__(self, year, release, stusab, summary_level, tableid): from geoid.censusnames import stusab as state_name_map super().__init__(year, release, stusab, summary_level, seq=None) self.meta = tablemeta(year, release) self.tableid = tableid.strip().lower() try: self.table = self.meta.tables[self.tableid] except KeyError as e: alt_c = 'c' + self.tableid[1:] alt_b = 'b' + self.tableid[1:] if (self.tableid.startswith('b') and alt_c in self.meta.tables): other_msg = f" However, table '{alt_c}' exists" elif (self.tableid.startswith('c') and alt_b in self.meta.tables): other_msg = f" However, table '{alt_b}' exists" else: other_msg = '' raise SourceError( f"Table metadata does not include table '{self.tableid}' " + other_msg) self.seq = int(self.table.seq) self.state_abs = list( state_name_map.values()) if self.stusab.upper() == 'US' else [ self.stusab ] # First sequence file sequence_file = SequenceFile(self.year, self.release, self.state_abs[0], self.summary_level, self.seq) # Get the column names that we will be extracting from the segment self._columns = [] for c in sequence_file.meta: if c.table_id and c.table_id.lower() == tableid.lower(): self._columns.append(c) self.lr_pos = sequence_file.file_headers.index('LOGRECNO') self.col_positions = [c.seq_file_col_no for c in self._columns] self.ig = itemgetter(*self.col_positions) geo = self.geo() self.file_headers = geo['LOGRECNO'][0] + self.ig( sequence_file.file_headers) self.descriptions = geo['LOGRECNO'][0] + self.ig( sequence_file.descriptions)
def split_blocks(pkg): """Download block files and cache them""" cache = get_cache(pkg) states = list(stusab.values()) keys = run_mp(_f_get_split_blocks, [(st, cache, pkg.reference('block_templ').url.format(st=st)) for st in states], n_cpu=4) # 4 cpu b/c we're downloading return keys
def join_blocks(pkg, break_starts): """Join census blocks and OSM points""" cache = get_cache(pkg) states = list(stusab.values()) tasks = list(e + (cache,) for e in product(break_starts, states)) keys = run_mp(_f_join_blocks, tasks) joins = [e for e in keys if not isinstance(e, Exception)] exn = [e for e in keys if isinstance(e, Exception)] return joins
def tracts(self): if self._tracts is None: logger.info("Building tracts") url_t = self.pkg.reference('us_tracts_template').url frames = [ rg.geoframe(url_t.format(st=st)) for st in tqdm(stusab.values()) ] tracts = pd.concat(frames).to_crs(4326) # Mark the tracts in the continential US tracts['continential'] = tracts.statefp.isin( self.states.statefp.unique()).astype(int) tracts['tract_id'] = tracts.reset_index().index # Need to convert to each UTM zone to get accurate area # computation. # tracts = tracts.sort_values('geoid').reset_index() # frames = [ g.to_crs(int(idx)).area for idx, g in tracts.groupby('utm_epsg')] # t = pd.concat(frames).to_frame('utm_area') # tracts = tracts.join(t) tracts['geohash'] = tracts[[ 'intptlat', 'intptlon' ]].astype(float).apply(lambda r: gh.encode(r.intptlat, r.intptlon), axis=1) tracts['gh4'] = tracts.geohash.str.slice(0, 4) self._tracts = tracts[[ 'geoid', 'tract_id', 'geohash', 'statefp', 'intptlat', 'intptlon', 'geometry', 'aland', 'awater', 'gh4', 'continential' ]] return self._tracts
from .files.metafiles import TableMeta from .appurl import CensusUrl from .files.appurl import CensusFileUrl, CensusGeoUrl from .censusreporter.url import CensusReporterUrl, CensusReporterShapeURL from pkg_resources import get_distribution, DistributionNotFound try: # Change here if project is renamed and does not equal the package name dist_name = 'publicdata-census' __version__ = get_distribution(dist_name).version except DistributionNotFound: __version__ = 'unknown' finally: del get_distribution, DistributionNotFound from geoid.censusnames import stusab continential_states = list( sorted(set(stusab.values()) - {'DC', 'PR', 'AK', 'HI'})) def census_table(table, state, sl='state', year=2018, release=5): import rowgenerators as rg return rg.dataframe(f'census://{year}/{release}/{state}/{sl}/{table}') def census_geo(state, sl='state', year=2018, release=5): import rowgenerators as rg return rg.geoframe(f'censusgeo://{year}/{release}/{state}/{sl}')