Exemplo n.º 1
0
def test_tag(loader):
    # Tag parcels with block group ID.
    parcels = loader.tables.sample.heather_farms
    bg = loader.tables.sample.hf_bg
    assert not hasattr(parcels, 'bg_id')
    spatialtoolz.tag(parcels, 'bg_id', bg, 'objectid')
    assert hasattr(parcels, 'bg_id')

    # Build DataFrame from parcels and block groups tables.
    parcels_df = db_to_df(parcels, index_col='parcel_id')
    bg_df = db_to_df(bg, index_col='objectid')

    # Assert that all parcels have integer block groups.
    assert not parcels_df.bg_id.isnull().any()
    assert np.issubdtype(parcels_df.bg_id.dtype, int)

    # Assert that there are at least 10 unique parcel block groups.
    parcels_bg_ids = parcels_df.bg_id.unique()
    assert len(parcels_bg_ids) >= 10

    # Assert that parcel block groups are a subset of all block groups.
    assert np.all([bg_id in bg_df.index for bg_id in parcels_bg_ids])
Exemplo n.º 2
0
def test_tag(loader):
    # Tag parcels with block group ID.
    parcels = loader.tables.sample.heather_farms
    bg = loader.tables.sample.hf_bg
    assert not hasattr(parcels, 'bg_id')
    spatialtoolz.tag(parcels, 'bg_id', bg, 'objectid')
    assert hasattr(parcels, 'bg_id')

    # Build DataFrame from parcels and block groups tables.
    parcels_df = db_to_df(parcels, index_col='parcel_id')
    bg_df = db_to_df(bg, index_col='objectid')

    # Assert that all parcels have integer block groups.
    assert not parcels_df.bg_id.isnull().any()
    assert np.issubdtype(parcels_df.bg_id.dtype, int)

    # Assert that there are at least 10 unique parcel block groups.
    parcels_bg_ids = parcels_df.bg_id.unique()
    assert len(parcels_bg_ids) >= 10

    # Assert that parcel block groups are a subset of all block groups.
    assert np.all([bg_id in bg_df.index for bg_id in parcels_bg_ids])
Exemplo n.º 3
0
from spandex import TableLoader
from spandex.spatialtoolz import tag
from spandex.io import exec_sql
import pandas.io.sql as sql

###Database connection
loader = TableLoader()
t = loader.tables

###Tagging parcels with taz id based on point in poly
tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key')


###Deal with special cases where parcels do not fall within TAZ boundaries
def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)


exec_sql("""
CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres
""")

if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');"
            ).values[0][0] == False:
    exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))")

exec_sql("""
CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || '  as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) ||  ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' ||  CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres
""")
Exemplo n.º 4
0
            sql_str = sql_str + ' UNION ALL '

sql_str = 'with a as (' + sql_str + ') select * into staging.puma00 from a'
exec_sql(sql_str)

exec_sql('ALTER TABLE staging.puma00 ADD COLUMN gid BIGSERIAL PRIMARY KEY')

exec_sql("""
CREATE INDEX puma00_gist ON staging.puma00
  USING gist (geom);
""")

loader.database.refresh()

# Tag tracts with a parcel_id
tag(loader.tables.staging.tracts10, 'puma00_id', loader.tables.staging.puma00,
    'puma5')

tract10_puma10_rel_file = urllib.URLopener()
tract10_puma10_rel_file.retrieve(
    "http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt",
    os.path.join(loader.get_path('tract2010_geom'),
                 'tract10_puma10_rel_file.csv'))

tract10_puma10_rel = pd.read_csv(os.path.join(
    loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'),
                                 dtype={
                                     "STATEFP": "object",
                                     "COUNTYFP": "object",
                                     "TRACTCE": "object",
                                     "PUMA5CE": "object"
                                 })
Exemplo n.º 5
0
from spandex import TableLoader
from spandex.spatialtoolz import tag
from spandex.io import exec_sql
import pandas.io.sql as sql

###Database connection
loader = TableLoader()
t = loader.tables

###Tagging parcels with taz id based on point in poly
tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key')


###Deal with special cases where parcels do not fall within TAZ boundaries
def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)
    
exec_sql("""
CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres
""")

if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');").values[0][0] == False:
    exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))")
    
exec_sql("""
CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || '  as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) ||  ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' ||  CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres
""")

exec_sql("""
Exemplo n.º 6
0
            sql_str = sql_str + ' UNION ALL '
        
sql_str = 'with a as (' + sql_str + ') select * into staging.puma00 from a'
exec_sql(sql_str)

exec_sql('ALTER TABLE staging.puma00 ADD COLUMN gid BIGSERIAL PRIMARY KEY')

exec_sql("""
CREATE INDEX puma00_gist ON staging.puma00
  USING gist (geom);
""")

loader.database.refresh()

# Tag tracts with a parcel_id
tag(loader.tables.staging.tracts10, 'puma00_id', loader.tables.staging.puma00, 'puma5')

tract10_puma10_rel_file = urllib.URLopener()
tract10_puma10_rel_file.retrieve("http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt", 
                  os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'))

tract10_puma10_rel = pd.read_csv(os.path.join(loader.get_path('tract2010_geom'), 'tract10_puma10_rel_file.csv'), 
                                 dtype={
                                            "STATEFP": "object",
                                            "COUNTYFP": "object",
                                            "TRACTCE": "object",
                                            "PUMA5CE": "object"
                                        })
tract10_puma00 = db_to_df('select geoid10, namelsad10, puma00_id from staging.tracts10;')

##Need statefp/countyfp/tractce columns on tracts (split from geoid)
Exemplo n.º 7
0
loader.database.refresh()

t = loader.tables
staging = loader.tables.staging


# Load shapefiles specified above to the project database.
loader.load_shp_map(shapefiles)


# Fix invalid geometries and reproject.
conform_srids(loader.srid, schema=staging, fix=True)


# Tag parcels with a block_id
tag(t.staging.parcels, 'block_geoid', t.staging.blocks, 'geoid10')
# Tag sitespec with a parcel_id
tag(t.staging.sitespec, 'parcel_id', t.staging.parcels, 'parcel_id')


# Load csv's
csvs = {
    'local_effect_distances':
    'space/local_effect_distances.csv',

    'sqft_per_job_by_devtype':
    'employment/sqftPerEmpByDevType.csv',

    'sqft_per_job_by_activity_by_devtype':
    'employment/sqftPerEmpByActivityByDevType.csv',