def test_tag(loader): # Tag parcels with block group ID. parcels = loader.tables.sample.heather_farms bg = loader.tables.sample.hf_bg assert not hasattr(parcels, 'bg_id') spatialtoolz.tag(parcels, 'bg_id', bg, 'objectid') assert hasattr(parcels, 'bg_id') # Build DataFrame from parcels and block groups tables. parcels_df = db_to_df(parcels, index_col='parcel_id') bg_df = db_to_df(bg, index_col='objectid') # Assert that all parcels have integer block groups. assert not parcels_df.bg_id.isnull().any() assert np.issubdtype(parcels_df.bg_id.dtype, int) # Assert that there are at least 10 unique parcel block groups. parcels_bg_ids = parcels_df.bg_id.unique() assert len(parcels_bg_ids) >= 10 # Assert that parcel block groups are a subset of all block groups. assert np.all([bg_id in bg_df.index for bg_id in parcels_bg_ids])
def test_tag(loader): # Tag parcels with block group ID. parcels = loader.tables.sample.heather_farms bg = loader.tables.sample.hf_bg assert not hasattr(parcels, 'bg_id') spatialtoolz.tag(parcels, 'bg_id', bg, 'objectid') assert hasattr(parcels, 'bg_id') # Build DataFrame from parcels and block groups tables. parcels_df = db_to_df(parcels, index_col='parcel_id') bg_df = db_to_df(bg, index_col='objectid') # Assert that all parcels have integer block groups. assert not parcels_df.bg_id.isnull().any() assert np.issubdtype(parcels_df.bg_id.dtype, int) # Assert that there are at least 10 unique parcel block groups. parcels_bg_ids = parcels_df.bg_id.unique() assert len(parcels_bg_ids) >= 10 # Assert that parcel block groups are a subset of all block groups. assert np.all([bg_id in bg_df.index for bg_id in parcels_bg_ids])
from spandex import TableLoader from spandex.spatialtoolz import tag from spandex.io import exec_sql import pandas.io.sql as sql ###Database connection loader = TableLoader() t = loader.tables ###Tagging parcels with taz id based on point in poly tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key') ###Deal with special cases where parcels do not fall within TAZ boundaries def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) exec_sql(""" CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres """) if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');" ).values[0][0] == False: exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))") exec_sql(""" CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || ' as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) || ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres """)
sql_str = sql_str + ' UNION ALL ' sql_str = 'with a as (' + sql_str + ') select * into staging.puma00 from a' exec_sql(sql_str) exec_sql('ALTER TABLE staging.puma00 ADD COLUMN gid BIGSERIAL PRIMARY KEY') exec_sql(""" CREATE INDEX puma00_gist ON staging.puma00 USING gist (geom); """) loader.database.refresh() # Tag tracts with a parcel_id tag(loader.tables.staging.tracts10, 'puma00_id', loader.tables.staging.puma00, 'puma5') tract10_puma10_rel_file = urllib.URLopener() tract10_puma10_rel_file.retrieve( "http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt", os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv')) tract10_puma10_rel = pd.read_csv(os.path.join( loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'), dtype={ "STATEFP": "object", "COUNTYFP": "object", "TRACTCE": "object", "PUMA5CE": "object" })
from spandex import TableLoader from spandex.spatialtoolz import tag from spandex.io import exec_sql import pandas.io.sql as sql ###Database connection loader = TableLoader() t = loader.tables ###Tagging parcels with taz id based on point in poly tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key') ###Deal with special cases where parcels do not fall within TAZ boundaries def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) exec_sql(""" CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres """) if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');").values[0][0] == False: exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))") exec_sql(""" CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || ' as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) || ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres """) exec_sql("""
sql_str = sql_str + ' UNION ALL ' sql_str = 'with a as (' + sql_str + ') select * into staging.puma00 from a' exec_sql(sql_str) exec_sql('ALTER TABLE staging.puma00 ADD COLUMN gid BIGSERIAL PRIMARY KEY') exec_sql(""" CREATE INDEX puma00_gist ON staging.puma00 USING gist (geom); """) loader.database.refresh() # Tag tracts with a parcel_id tag(loader.tables.staging.tracts10, 'puma00_id', loader.tables.staging.puma00, 'puma5') tract10_puma10_rel_file = urllib.URLopener() tract10_puma10_rel_file.retrieve("http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt", os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv')) tract10_puma10_rel = pd.read_csv(os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'), dtype={ "STATEFP": "object", "COUNTYFP": "object", "TRACTCE": "object", "PUMA5CE": "object" }) tract10_puma00 = db_to_df('select geoid10, namelsad10, puma00_id from staging.tracts10;') ##Need statefp/countyfp/tractce columns on tracts (split from geoid)
loader.database.refresh() t = loader.tables staging = loader.tables.staging # Load shapefiles specified above to the project database. loader.load_shp_map(shapefiles) # Fix invalid geometries and reproject. conform_srids(loader.srid, schema=staging, fix=True) # Tag parcels with a block_id tag(t.staging.parcels, 'block_geoid', t.staging.blocks, 'geoid10') # Tag sitespec with a parcel_id tag(t.staging.sitespec, 'parcel_id', t.staging.parcels, 'parcel_id') # Load csv's csvs = { 'local_effect_distances': 'space/local_effect_distances.csv', 'sqft_per_job_by_devtype': 'employment/sqftPerEmpByDevType.csv', 'sqft_per_job_by_activity_by_devtype': 'employment/sqftPerEmpByActivityByDevType.csv',