def test_random_state(): # no seeds reno = Community.from_census(msa_fips="39900", datastore=DataStore()) r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5) r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5) card1 = r1.gdf.groupby("kmeans").count()["geoid"].values card1.sort() card2 = r2.gdf.groupby("kmeans").count()["geoid"].values card2.sort() # test that the cardinalities are different np.testing.assert_raises( AssertionError, np.testing.assert_array_equal, card1, card2 ) # seeds reno = Community.from_census(msa_fips="39900", datastore=DataStore()) seed = 10 r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed) r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed) card1 = r1.gdf.groupby("kmeans").count()["geoid"].values card1.sort() card2 = r2.gdf.groupby("kmeans").count()["geoid"].values card2.sort() # test that the cardinalities are identical np.testing.assert_array_equal(card1, card2)
def test_Community_from_gdfs_crs(): t90 = datasets.tracts_1990() t00 = datasets.tracts_2000() t90 = t90.to_crs(4236) t00 = t00.to_crs(3857) try: Community.from_geodataframes([t90, t00]) except AssertionError: print("From_gdfs constructor successfully detects inconsistent crs.") pass
def Adaptive_Choropleth_Mapper_viz(param): write_LOG(param) # convert year, variable to years, variables in the param if ('years' not in param and 'year' in param): param['years'] = [param['year']] if ('variables' not in param and 'variable' in param): param['variables'] = [param['variable']] #print(param) # select community by state_fips, msa_fips, county_fips if ('msa_fips' in param and param['msa_fips']): community = Community.from_ltdb(years=param['years'], msa_fips=param['msa_fips']) #community = Community.from_ltdb(msa_fips=param['msa_fips']) elif ('county_fips' in param and param['county_fips']): community = Community.from_ltdb(years=param['years'], county_fips=param['county_fips']) elif ('state_fips' in param and param['state_fips']): community = Community.from_ltdb(years=param['years'], state_fips=param['state_fips']) #print(community.gdf) codebook = pd.read_csv('template/conversion_table_codebook.csv') codebook.set_index(keys='variable', inplace=True) labels = copy.deepcopy(param['variables']) label = 'short_name' # default if (param['label'] == 'variable'): label = 'variable' if (param['label'] == 'full_name'): label = 'full_name' if (param['label'] == 'short_name'): label = 'short_name' if (label != 'variable'): for idx, variable in enumerate(param['variables']): try: codeRec = codebook.loc[variable] labels[idx] = codeRec[label] except: print("variable not found in codebook. variable:", variable) param['labels'] = labels write_INDEX_html(param) write_GEO_CONFIG_js(param) write_GEO_VARIABLES_js(community, param) write_GEO_JSON_js(community, param) local_dir = os.path.dirname(os.path.realpath(__file__)) fname = urllib.parse.quote('index.html') template_dir = os.path.join(local_dir, 'ACM_' + param['filename_suffix']) url = 'file:' + os.path.join(template_dir, fname) webbrowser.open(url) print('Please run ' + '"ACM_' + param['filename_suffix'] + '/index.html"' + ' to your web browser.') print('Advanced options are available in ' + '"ACM_' + param['filename_suffix'] + '/data/GEO_CONFIG.js"')
def test_harmonize_area(): la = Community.from_census(county_fips="06037") harmonized = la.harmonize(2000, extensive_variables=["n_total_housing_units"], intensive_variables=["p_vacant_housing_units"], raster=local_raster) assert_allclose( harmonized.gdf[harmonized.gdf.year == 2000].n_total_housing_units.sum(), 3271578.974605, atol=600, ) assert_allclose( harmonized.gdf[harmonized.gdf.year == 1990].n_total_housing_units.sum(), 3163560.996240, ) assert_allclose( harmonized.gdf[harmonized.gdf.year == 2010].n_total_housing_units.sum(), 3441415.997327, ) assert_allclose(harmonized.gdf.p_vacant_housing_units.sum(), 33011.58879, rtol=1e-03)
def test_Community_from_gdfs(): t90 = datasets.tracts_1990() t90 = t90[t90.geoid.str.startswith("11")] t00 = datasets.tracts_2000() t00 = t00[t00.geoid.str.startswith("11")] assert Community.from_geodataframes([t90, t00]).gdf.shape == (380, 192)
def test_harmonize_area_weighted(): balt = Community.from_census(county_fips="24510") harmonized_nlcd_weighted = balt.harmonize( 2000, extensive_variables=["n_total_housing_units"], intensive_variables=["p_vacant_housing_units"], weights_method="dasymetric", raster=local_raster) assert harmonized_nlcd_weighted.gdf.n_total_housing_units.sum() == 900620.0 assert_allclose(harmonized_nlcd_weighted.gdf.p_vacant_housing_units.sum(), 8832.8796, rtol=1e-03)
def test_transition(): """ Testing transition modeling. """ columbus = Community.from_ltdb(msa_fips="18140") columns = [ "median_household_income", "p_poverty_rate", "p_edu_college_greater", "p_unemployment_rate", ] columbus1 = columbus.cluster( columns=[ "median_household_income", "p_poverty_rate", "p_edu_college_greater", "p_unemployment_rate", ], method="ward", ) # 1. Markov modeling m = transition(columbus1.gdf, cluster_col="ward") mp = np.array([ [ 0.79189189, 0.00540541, 0.0027027, 0.13243243, 0.06216216, 0.00540541 ], [0.0203252, 0.75609756, 0.10569106, 0.11382114, 0.0, 0.00406504], [0.00917431, 0.20183486, 0.75229358, 0.01834862, 0.0, 0.01834862], [0.1959799, 0.18341709, 0.00251256, 0.61809045, 0.0, 0.0], [0.32307692, 0.0, 0.0, 0.0, 0.66153846, 0.01538462], [0.09375, 0.0625, 0.0, 0.0, 0.0, 0.84375], ]) np.testing.assert_allclose(m.p, mp, RTOL) # 2. Spatial Markov modeling np.random.seed(5) sm = transition(columbus1.gdf, cluster_col="ward", w_type="queen") smp = np.array([ [0.82413793, 0.0, 0.0, 0.10689655, 0.06896552, 0.0], [0.25, 0.5, 0.125, 0.125, 0.0, 0.0], [0.5, 0.0, 0.5, 0.0, 0.0, 0.0], [0.23809524, 0.0952381, 0.0, 0.66666667, 0.0, 0.0], [0.21621622, 0.0, 0.0, 0.0, 0.75675676, 0.02702703], [0.16666667, 0.0, 0.0, 0.0, 0.0, 0.83333333], ]) np.testing.assert_allclose(sm.P[0], smp, RTOL)
def test_sequence(): """ Testing sequence modeling. """ columbus = Community.from_ltdb(msa_fips="18140") columns = [ "median_household_income", "p_poverty_rate", "p_edu_college_greater", "p_unemployment_rate", ] columbus1 = columbus.cluster( columns=[ "median_household_income", "p_poverty_rate", "p_edu_college_greater", "p_unemployment_rate", ], method="ward", ) # 1. Transition-orientied optimal matching output = sequence(columbus1.gdf, seq_clusters=5, dist_type="tran", cluster_col="ward") values = np.array([3, 3, 0, 2, 3, 1]) np.testing.assert_allclose(output[1].values[0], values, RTOL) # 2. Hamming distance output = sequence(columbus1.gdf, seq_clusters=5, dist_type="hamming", cluster_col="ward") values = np.array([3, 3, 0, 2, 3, 2]) np.testing.assert_allclose(output[1].values[0], values, RTOL)
def test_Community_from_census(): assert Community.from_census(state_fips="24").gdf.shape == (3759, 195)
def test_Community_from_boundary(): msas = datasets.msas() reno = msas[msas["geoid"] == "39900"] rn = Community.from_census(boundary=reno) assert rn.gdf.shape == (234, 195)
def test_Community_from_indices(): chi = Community.from_ncdb(fips=["17031", "17019"]) assert chi.gdf.shape == (6797, 78)
def test_Community_from_stcofips(): mn = Community.from_census(state_fips="27", county_fips=["26001", "26002"]) assert mn.gdf.shape == (3881, 195)
def test_Community_from_cbsa(): la = Community.from_census(msa_fips="31080") assert la.gdf.shape == (7683, 195)
from geosnap import Community import numpy as np from geosnap import DataStore from numpy.testing import assert_array_equal, assert_array_almost_equal reno = Community.from_census(msa_fips="39900", datastore=DataStore()) columns = [ "median_household_income", "p_poverty_rate", "p_unemployment_rate", ] # Aspatial Clusters def test_gm(): r = reno.cluster(columns=columns, method="gaussian_mixture", best_model=True) assert len(r.gdf.gaussian_mixture.unique()) >= 5 def test_ward(): r = reno.cluster(columns=columns, method="ward") assert len(r.gdf.ward.unique()) == 7 def test_spectral(): r = reno.cluster(columns=columns, method="spectral") assert len(r.gdf.spectral.unique()) == 7
import numpy as np from geosnap import Community from geosnap.analyze import sequence, transition RTOL = 0.00001 columbus = Community.from_ltdb(msa_fips="18140") columns = [ "median_household_income", "p_poverty_rate", "p_edu_college_greater", "p_unemployment_rate", ] columbus1 = columbus.cluster( columns=[ "median_household_income", "p_poverty_rate", "p_edu_college_greater", "p_unemployment_rate", ], method="ward", ) def test_transition(): """ Testing transition modeling. """ # 1. Markov modeling
def Adaptive_Choropleth_Mapper_viz(param): # convert year, variable to years, variables in the param if ('years' not in param and 'year' in param): param['years'] = [param['year']] if ('variables' not in param and 'variable' in param): param['variables'] = [param['variable']] #print(param) # select community by state_fips, msa_fips, county_fips community = None if ('msa_fips' in param and param['msa_fips']): community = Community.from_ltdb(years=param['years'], msa_fips=param['msa_fips']) #community = Community.from_ltdb(msa_fips=param['msa_fips']) elif ('county_fips' in param and param['county_fips']): community = Community.from_ltdb(years=param['years'], county_fips=param['county_fips']) elif ('state_fips' in param and param['state_fips']): community = Community.from_ltdb(years=param['years'], state_fips=param['state_fips']) #print(community.gdf) # if the user enters CSV and shapefile, use the files from the user #### This is executed when the user enter attributes in csv file and geometroy in shapefile ###################### if (community is None and 'inputCSV' in param): community = Community() #community.gdf = pd.read_csv(param['inputCSV'], dtype={'geoid':str}) community.gdf = param["inputCSV"] #print(community.gdf) geoid = community.gdf.columns[0] #community.gdf = community.gdf.astype(str) #print("inputCSV: " + community.gdf.geoid) community.gdf[community.gdf.columns[0]] = community.gdf[geoid].astype( str) #print("community.gdf.columns[0]:", community.gdf.columns[0]) # read shape file to df_shape #df_shape = gpd.read_file(param['shapefile']) df_shape = param['shapefile'] df_shape = df_shape.astype(str) #print("shapefile: " + df_shape.GEOID10) geokey = df_shape.columns[0] #print(geokey) df_shape = df_shape.set_index(geokey) # insert geometry to community.gdf geometry = [] for index, row in community.gdf.iterrows(): tractid = row[geoid] try: tract = df_shape.loc[tractid] geometry.append(shapely.wkt.loads(tract.geometry)) except KeyError: #print("Tract ID [{}] is not found in the shape file {}".format(tractid, param['shapefile'])) geometry.append(None) # print( "geometry" in community.gdf ) #f hasattr(community.gdf, "geoemtry"): #if (community.gdf["geoemtry"] is None): # pass #else: if (("geometry" in community.gdf) == False): community.gdf.insert(len(community.gdf.columns), "geometry", geometry) ################################################################################################################ community.gdf = community.gdf.replace([np.inf, -np.inf], np.nan) # check if geometry is not null for Spatial Clustering community.gdf = community.gdf[pd.notnull(community.gdf['geometry'])] #print(community.gdf) codebook = pd.read_csv('template/conversion_table_codebook.csv') codebook.set_index(keys='variable', inplace=True) labels = copy.deepcopy(param['variables']) label = 'short_name' # default if (param['label'] == 'variable'): label = 'variable' if (param['label'] == 'full_name'): label = 'full_name' if (param['label'] == 'short_name'): label = 'short_name' if (label != 'variable'): for idx, variable in enumerate(param['variables']): try: codeRec = codebook.loc[variable] labels[idx] = codeRec[label] except: print("variable not found in codebook. variable:", variable) param['labels'] = labels write_INDEX_html(param) write_CONFIG_js(param) write_VARIABLES_js(community, param) write_GEO_JSON_js(community, param) ''' #Create directory for local machine local_dir = os.path.dirname(os.path.realpath(__file__)) fname =urllib.parse.quote('index.html') template_dir = os.path.join(local_dir, 'ACM_' + param['filename_suffix']) url = 'file:' + os.path.join(template_dir, fname) webbrowser.open(url) print('Please run ' + '"ACM_' + param['filename_suffix']+'/index.html"'+' to your web browser.') print('Advanced options are available in ' + '"ACM_' + param['filename_suffix']+'/data/CONFIG.js"') ''' #Create directory for Visualization servers = list(notebookapp.list_running_servers()) servers1 = 'https://cybergisx.cigi.illinois.edu' + servers[0][ "base_url"] + 'view' servers2 = 'https://cybergisx.cigi.illinois.edu' + servers[0][ "base_url"] + 'edit' cwd = os.getcwd() prefix_cwd = "/home/jovyan/work" cwd = cwd.replace(prefix_cwd, "") # This is for Jupyter notebbok installed in your PC local_dir1 = cwd local_dir2 = cwd #This is for CyberGISX. Uncomment two command lines below when you run in CyberGIX Environment #local_dir1 = servers1 + cwd #local_dir2 = servers2 + cwd #print(local_dir) fname = urllib.parse.quote('index.html') template_dir = os.path.join(local_dir1, 'ACM_' + param['filename_suffix']) #url = 'file:' + os.path.join(template_dir, fname) url = os.path.join(template_dir, fname) webbrowser.open(url) print( 'To see your visualization, click the URL below (or locate the files):' ) print(url) print('Advanced options are available in ') print(local_dir2 + '/' + 'ACM_' + param['filename_suffix'] + '/data/CONFIG_' + param['filename_suffix'] + '.js')
def test_Community_from_lodes(): dc = Community.from_lodes(state_fips="11", years=[2010, 2015]) assert dc.gdf.shape == (6046, 57)
def test_Community_from_lodes(): dc = Community.from_lodes(state_fips="10", years=[2008, 2015]) assert dc.gdf.shape == (8674, 57)
from geosnap import Community, io import numpy as np reno = Community.from_census(msa_fips="39900") columns = ["median_household_income", "p_poverty_rate", "p_unemployment_rate"] # Aspatial Clusters def test_gm(): r = reno.cluster(columns=columns, method="gaussian_mixture", best_model=True) assert len(r.gdf.gaussian_mixture.unique()) >= 5 def test_ward(): r = reno.cluster(columns=columns, method="ward") assert len(r.gdf.ward.unique()) == 7 def test_spectral(): r = reno.cluster(columns=columns, method="spectral") assert len(r.gdf.spectral.unique()) == 7 def test_kmeans(): r = reno.cluster(columns=columns, method="kmeans")
def test_Community_from_lodes(): de = Community.from_lodes(state_fips="10", years=[2008, 2015]) assert de.gdf.shape == (41598, 58)
#%% from geosnap import Community #%% fip = '48' out1 = '/tmp/TX_2000.wkt' out2 = '/tmp/TX_2010.wkt' #%% data = Community.from_census(state_fips='48') dataset = data.gdf[['geometry', 'geoid', 'year']] #%% D_2000 = dataset[(dataset.year == 2000)] D_2000.to_csv(out1, sep='\t', index=False, header=False) #%% D_2010 = dataset[(dataset.year == 2010)] D_2010.to_csv(out2, sep='\t', index=False, header=False)