def rankQuantileMoves(inputs, scores, drop, subset=None, verbose=True): da = dropAny(inputs=inputs, scores=scores, subset=subset, drop=drop) if verbose: print(ps.Quantiles(da.orig_rank)) # quantile breaks key print('\n') r0 = ps.Quantiles(da.orig_rank).yb r1 = ps.Quantiles(da.drop1p_rank).yb moves_raw = pd.DataFrame({ 'r0': r0, 'r1': r1 }).groupby(['r0', 'r1']).size().unstack(fill_value=0) return np.round(moves_raw.apply(lambda x: x / sum(x), axis=1), 2)
def __init__(self, y, w, k=4, permutations=0, fixed=False): self.y = y rows, cols = y.shape self.cols = cols npm = np.matrix npa = np.array self.fixed = fixed if fixed: yf = y.flatten() yb = pysal.Quantiles(yf, k=k).yb yb.shape = (rows, cols) classes = yb else: classes = npa([ pysal.Quantiles(y[:, i], k=k).yb for i in np.arange(cols) ]).transpose() classic = Markov(classes) self.classes = classes self.p = classic.p self.s = classic.steady_state self.transitions = classic.transitions T, P, ss, F = self._calc(y, w, classes, k=k) self.T = T self.P = P self.S = ss self.F = F self.shtest = self._mn_test() self.chi2 = self._chi2_test() self.x2 = sum([c[0] for c in self.chi2]) dof = k * (k - 1) * (k - 1) self.x2_pvalue = 1 - stats.chi2.cdf(self.x2, dof) self.x2_dof = dof if permutations: nrp = np.random.permutation rp = range(permutations) counter = 0 x2_realizations = np.zeros((permutations, 1)) x2ss = [] for perm in range(permutations): T, P, ss, F = self._calc(nrp(y), w, classes, k=k) x2 = [chi2(T[i], self.transitions)[0] for i in range(k)] x2s = sum(x2) x2_realizations[perm] = x2s if x2s >= self.x2: counter += 1 self.x2_rpvalue = (counter + 1.0) / (permutations + 1.) self.x2_realizations = x2_realizations
def slider(): fp = "C:\zoovision\data\states\states2.shp" rg1 = gpd.read_file(fp) rg1 = rg1.to_crs(epsg=2163) # fig, ax = plt.subplots(1, figsize=(15, 10)) hr10 = ps.Quantiles(rg1.POP10_SQMI, k=10) # title = "Select parameters and press query to view surveillance summary" # ax.set_title(title, y=1.08, fontsize=30) ax.set_axis_off() rg1.plot(ax=ax) # define demnsions of slider bar axcolor = 'lightgoldenrodyellow' axfreq = plt.axes([0.25, 0.1, 0.65, 0.03], facecolor=axcolor) # mapslider= plt.axes samp = Slider(axfreq, 'Week', 1, 40, valinit=1) samp.on_changed(update) if not os.path.isdir('static'): os.mkdir('static') else: # Remove old plot files for filename in glob.glob(os.path.join('static', '*.png')): os.remove(filename) # Use time in filename in order make a unique filename that the browser has not chached plotfile = os.path.join('static', str(time.time()) + '.png') plt.savefig(plotfile) plt.show() return plotfile
def setUp(self): self.data = geopandas.read_file(ps.examples.get_path('south.shp')) self.test_attribute = 'HR70' self.k = 10 self.breaks = ps.Quantiles(self.data[self.test_attribute].values, k=self.k).bins self.pal = mplpal.Inferno_10 self.cmap = mplpal.Inferno_10.get_mpl_colormap()
def quantile_map(coords,y,k, title='Quantile'): """ Quantile choropleth map Arguments ========= coords: Map_Projection instance y: array variable to map k: int number of classes title: string map title """ classification = ps.Quantiles(y,k) fig = plt.figure() ax = fig.add_subplot(111) patches = [] colors = [] i = 0 shape_colors = classification.bins[classification.yb] shape_colors = y #classification.bins[classification.yb] for shp in coords.projected: for ring in shp: x,y = ring x = x / coords.bounding_box[2] y = y / coords.bounding_box[3] n = len(x) x.shape = (n,1) y.shape = (n,1) xy = np.hstack((x,y)) polygon = Polygon(xy, True) patches.append(polygon) colors.append(shape_colors[i]) i += 1 cmap = cm.get_cmap('hot_r', k+1) boundaries = classification.bins.tolist() boundaries.insert(0,0) norm = clrs.BoundaryNorm(boundaries, cmap.N) p = PatchCollection(patches, cmap=cmap, alpha=0.4, norm=norm) colors = np.array(colors) p.set_array(colors) ax.add_collection(p) ax.set_frame_on(False) ax.axes.get_yaxis().set_visible(False) ax.axes.get_xaxis().set_visible(False) ax.set_title(title) plt.colorbar(p, cmap=cmap, norm = norm, boundaries = boundaries, ticks= boundaries) plt.show() return classification
def test___init__(self): import numpy as np f = pysal.open(pysal.examples.get_path('usjoin.csv')) pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)]) q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose() m = pysal.Markov(q5) np.testing.assert_array_almost_equal(markov.shorrock(m.p), 0.19758992000997844)
def test___init__(self): import numpy as np f = pysal.open(pysal.examples.get_path('usjoin.csv')) pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)]) q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose() m = pysal.Markov(q5) res = np.matrix( [[0.08988764, 0.21468144, 0.21125, 0.20194986, 0.07259074]]) np.testing.assert_array_almost_equal(markov.prais(m.p), res)
def _calc(self, y, w, classes, k): # lag markov ly = pysal.lag_spatial(w, y) npm = np.matrix npa = np.array if self.fixed: l_classes = pysal.Quantiles(ly.flatten(), k=k).yb l_classes.shape = ly.shape else: l_classes = npa([ pysal.Quantiles(ly[:, i], k=k).yb for i in np.arange(self.cols) ]) l_classes = l_classes.transpose() l_classic = Markov(l_classes) T = np.zeros((k, k, k)) n, t = y.shape for t1 in range(t - 1): t2 = t1 + 1 for i in range(n): T[l_classes[i, t1], classes[i, t1], classes[i, t2]] += 1 P = np.zeros_like(T) F = np.zeros_like(T) # fmpt ss = np.zeros_like(T[0]) for i, mat in enumerate(T): row_sum = mat.sum(axis=1) row_sum = row_sum + (row_sum == 0) p_i = np.matrix(np.diag(1. / row_sum) * np.matrix(mat)) #print i #print mat #print p_i ss[i] = steady_state(p_i).transpose() try: F[i] = fmpt(p_i) except: #pylint; "No exception type(s) specified" print "Singlular fmpt matrix for class ", i P[i] = p_i return T, P, ss, F
def simplify_co_occurrence(co, nn=3): # quintile breaks (crude, still need to account for upper/diagonals) q = ps.Quantiles(co).yb q = np.reshape(q, co.shape) # nearest neighbors graph knn = neighbors.NearestNeighbors(n_neighbors=nn) neigh = knn.fit(co) knn_mat = neigh.kneighbors_graph(co).toarray() out = np.multiply(q, knn_mat) return out
def markov(observations, w=None, numQuints=5, method="regular"): result = None s = None if method == "regular": # non spatial analysis quintiles = np.array([pysal.Quantiles(y, k=numQuints).yb for y in observations]).transpose() result = pysal.Markov(quintiles) #s = result.steady_state else: observations = observations.transpose() if method == "spatial": # standardize observations for smoother calculations: observations = observations / (observations.mean(axis=0)) result = pysal.Spatial_Markov(observations, w, fixed=True, k=numQuints) #s = result.S else: # method == lisa result = pysal.LISA_Markov(observations, w) #s = result.steady_state return result.transitions, result.p, s, pysal.ergodic.fmpt(result.p)
def test___init__(self): # markov = Markov(class_ids, classes) import pysal f = pysal.open(pysal.examples.get_path('usjoin.csv')) pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)]) q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose() m = pysal.Markov(q5) expected = np.array([[729., 71., 1., 0., 0.], [72., 567., 80., 3., 0.], [0., 81., 631., 86., 2.], [0., 3., 86., 573., 56.], [0., 0., 1., 57., 741.]]) np.testing.assert_array_equal(m.transitions, expected) expected = np.matrix( [[0.91011236, 0.0886392, 0.00124844, 0., 0.], [0.09972299, 0.78531856, 0.11080332, 0.00415512, 0.], [0., 0.10125, 0.78875, 0.1075, 0.0025], [0., 0.00417827, 0.11977716, 0.79805014, 0.07799443], [0., 0., 0.00125156, 0.07133917, 0.92740926]]) np.testing.assert_array_almost_equal(m.p.getA(), expected.getA()) expected = np.matrix([[0.20774716], [0.18725774], [0.20740537], [0.18821787], [0.20937187]]).getA() np.testing.assert_array_almost_equal(m.steady_state.getA(), expected)
def base_choropleth_classif(map_obj, values, classification='quantiles', \ k=5, cmap='hot_r', sample_fisher=True): ''' Set coloring based based on different classification methods ... Arguments --------- map_obj : Poly/Line collection Output from map_X_shp values : array Numpy array with values to map classification : str Classificatio method to use. Options supported: * 'quantiles' (default) * 'fisher_jenks' * 'equal_interval' k : int Number of bins to classify values in and assign a color to cmap : str Matplotlib coloring scheme sample_fisher : Boolean Defaults to True, controls whether Fisher-Jenks classification uses a sample (faster) or the entire array of values. Ignored if 'classification'!='fisher_jenks' Returns ------- map : PatchCollection Map object with the polygons from the shapefile and unique value coloring ''' if classification == 'quantiles': classification = ps.Quantiles(values, k) boundaries = classification.bins.tolist() if classification == 'equal_interval': classification = ps.Equal_Interval(values, k) boundaries = classification.bins.tolist() if classification == 'fisher_jenks': if sample_fisher: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled( values, k) else: classification = ps.Fisher_Jenks(values, k) boundaries = classification.bins[:] map_obj.set_alpha(0.4) cmap = cm.get_cmap(cmap, k + 1) map_obj.set_cmap(cmap) boundaries.insert(0, values.min()) norm = clrs.BoundaryNorm(boundaries, cmap.N) map_obj.set_norm(norm) if isinstance(map_obj, mpl.collections.PolyCollection): pvalues = _expand_values(values, map_obj.shp2dbf_row) map_obj.set_array(pvalues) map_obj.set_edgecolor('k') elif isinstance(map_obj, mpl.collections.LineCollection): pvalues = _expand_values(values, map_obj.shp2dbf_row) map_obj.set_array(pvalues) elif isinstance(map_obj, mpl.collections.PathCollection): if not hasattr(map_obj, 'shp2dbf_row'): map_obj.shp2dbf_row = np.arange(values.shape[0]) map_obj.set_array(values) return map_obj
def base_choropleth_classif(shp_link, values, classification='quantiles', \ k=5, cmap='hot_r', projection='merc', sample_fisher=True): ''' Create a map object with coloring based on different classification methods, from a shapefile in lon/lat CRS ... Arguments --------- shp_link : str Path to shapefile values : array Numpy array with values to map classification : str Classificatio method to use. Options supported: * 'quantiles' (default) * 'fisher_jenks' * 'equal_interval' k : int Number of bins to classify values in and assign a color to cmap : str Matplotlib coloring scheme projection : str Basemap projection. See [1]_ for a list. Defaults to 'merc' sample_fisher : Boolean Defaults to True, controls whether Fisher-Jenks classification uses a sample (faster) or the entire array of values. Ignored if 'classification'!='fisher_jenks' Returns ------- map : PatchCollection Map object with the polygons from the shapefile and unique value coloring Links ----- .. [1] <http://matplotlib.org/basemap/api/basemap_api.html#module-mpl_toolkits.basemap> ''' if classification == 'quantiles': classification = ps.Quantiles(values, k) boundaries = classification.bins.tolist() if classification == 'equal_interval': classification = ps.Equal_Interval(values, k) boundaries = classification.bins.tolist() if classification == 'fisher_jenks': if sample_fisher: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled( values, k) else: classification = ps.Fisher_Jenks(values, k) boundaries = classification.bins[:] map_obj = map_poly_shp_lonlat(shp_link, projection=projection) map_obj.set_alpha(0.4) cmap = cm.get_cmap(cmap, k + 1) map_obj.set_cmap(cmap) boundaries.insert(0, 0) norm = clrs.BoundaryNorm(boundaries, cmap.N) map_obj.set_norm(norm) map_obj.set_array(values) return map_obj
def plot_map(gdf, filename, crm, hlt=None, shading="district", figsize=10, label="", ring=None, circ=None, point=None, scores=None, legend=False): gdf["C"] = pd.Series(crm) if hlt: gdf["H"] = 0 gdf.loc[hlt, "H"] = 1 if shading == "density": gdf["density"] = gdf["pop"] / gdf["a"] gdf.loc[gdf["density"].isnull(), "density"] = 0. dis = gdf.dissolve("C", aggfunc='sum') dis.reset_index(inplace=True) target = dis["pop"].sum() / dis.shape[0] dis["frac"] = dis["pop"] / target bounds = gdf.total_bounds xr = bounds[2] - bounds[0] yr = bounds[3] - bounds[1] fs = (figsize * np.sqrt(xr / yr), figsize * np.sqrt(yr / xr)) bins = min(5, dis.shape[0]) q = ps.Quantiles(dis["frac"], k=bins) if scores: dis["scores"] = pd.Series(scores) if "target" in shading: col, alpha, trunc = "coolwarm", 0.7, "" if dis["frac"].max() > 2: norm = Normalize(vmin=0, vmax=2) trunc = " (Truncated)" elif dis["frac"].max() - 1 < 0.005: norm = Normalize(vmin=0.995, vmax=1.005) else: # regardless, keep it centered larger = max(1 - dis["frac"].min(), dis["frac"].max() - 1) norm = Normalize(vmin=1 - larger, vmax=1 + larger) cmap = plt.cm.ScalarMappable(norm=norm, cmap=col) ax = dis.plot(color="white", edgecolor="white", figsize=fs) for xi, row in dis.iterrows(): dis[dis.index == xi].plot(ax=ax, alpha=alpha, linewidth=1, edgecolor="black", facecolor=cmap.to_rgba(row["frac"])) fig = ax.get_figure() cax = fig.add_axes([0.16, 0.13, 0.70, 0.015 * np.sqrt(xr / yr)]) sm = plt.cm.ScalarMappable(cmap=col, norm=norm) sm._A = [] # gross cb = fig.colorbar( sm, cax=cax, alpha=alpha, # label = "Population / Target" + trunc, labelsize=12, orientation='horizontal', drawedges=True) cb.locator = ticker.MaxNLocator(nbins=5) cb.formatter.set_useOffset(False) cb.set_label("Population / Target" + trunc, size=12) cb.ax.tick_params(labelsize=12) cb.dividers.set_visible(False) cb.update_ticks() # if hlt: gdf[gdf["H"] == 1].plot(facecolor = "red", alpha = 0.1, linewidth = 0.05, ax = ax) elif "scores" in shading: col, alpha, trunc = "cool", 0.7, "" norm = Normalize(vmin=min(scores.values()), vmax=max([1, max(scores.values())])) # print(sum(scores.values())) cmap = plt.cm.ScalarMappable(norm=norm, cmap=col) ax = dis.plot(color="white", edgecolor="white", figsize=fs) for xi, row in dis.iterrows(): dis[dis.index == xi].plot(ax=ax, alpha=alpha, facecolor=cmap.to_rgba(row["scores"]), linewidth=1, edgecolor="black") fig = ax.get_figure() cax = fig.add_axes([0.16, 0.13, 0.70, 0.015 * np.sqrt(xr / yr)]) sm = plt.cm.ScalarMappable(cmap=col, norm=norm) sm._A = [] # gross cb = fig.colorbar( sm, cax=cax, alpha=alpha, # label = "Population / Target" + trunc, labelsize=12, orientation='horizontal', drawedges=True) cb.locator = ticker.MaxNLocator(nbins=5) cb.formatter.set_useOffset(False) cb.set_label("Score", size=12) cb.ax.tick_params(labelsize=12) cb.dividers.set_visible(False) cb.update_ticks() if hlt: gdf[gdf["H"] == 1].plot(facecolor="grey", alpha=0.1, linewidth=0.05, ax=ax) elif "counties" in shading: counties = gdf.dissolve("county").reset_index() # ax = counties.plot(column = "county", categorical = True, # cmap = "nipy_spectral", alpha = 0.5, linewidth = 0, figsize = fs) # ax = dis.set_geometry(dis.boundary).plot(edgecolor = "black", linewidth = 2.5, figsize = fs) ax = dis.plot(column="C", cmap="nipy_spectral", alpha=0.5, edgecolor="black", linewidth=2.5, figsize=fs) dis.set_geometry(dis.boundary).plot(edgecolor="black", linewidth=2.5, ax=ax) county_bounds = gpd.GeoDataFrame(geometry=gpd.GeoSeries( crs=counties.crs, data=[counties.boundary.unary_union])) # county_bounds.plot(edgecolor = "black", linewidth = 0.4, linestyle = "-", ax = ax) county_bounds.plot(edgecolor="white", linewidth=0.4, linestyle="-", ax=ax) elif "density" in shading: ax = gdf.plot(column="density", cmap="gray", scheme="quantiles", k=9, alpha=0.8, figsize=fs, linewidth=0) dis.plot(color="blue", alpha=0.3, linewidth=1, ax=ax) else: ax = dis.plot("C", alpha=0.5, categorical=True, cmap="nipy_spectral", linewidth=1, edgecolor="black", legend=legend, figsize=fs) if legend: ax.get_legend().set_bbox_to_anchor((1, 1)) if hlt: gdf[gdf["H"] == 1].plot(facecolor="grey", alpha=0.1, linewidth=0.05, ax=ax) ax.set_xlim([bounds[0] - 0.1 * xr, bounds[2] + 0.1 * xr]) ax.set_ylim([bounds[1] - 0.1 * yr, bounds[3] + 0.1 * yr]) if label: ax.text(bounds[0] - 0.16 * xr, bounds[3] + 0.12 * yr, label, fontsize=10) ax.set_axis_off() if ring is not None: ring["C"] = ring.index if shading == "district": ring.plot("C", categorical=True, cmap="nipy_spectral", ax=ax, linewidth=3) ring.plot(color="white", ax=ax, linewidth=1) else: ring.plot(color="black", ax=ax, linewidth=2.5) ring.plot(color="white", ax=ax, linewidth=0.7) if circ is not None: circ.plot(color="white", alpha=0.2, ax=ax, linewidth=0.4) if point is not None: if "district" in shading: point["C"] = point.index point.plot("C", categorical=True, cmap="nipy_spectral", ax=ax, markersize=3) else: point.plot(color="black", ax=ax, markersize=3) point.plot(color="white", ax=ax, markersize=1) if not filename: return ax ax.figure.savefig(filename, bbox_inches='tight', pad_inches=0.05) plt.close('all')
c = np.array([['b', 'a', 'c'], ['c', 'c', 'a'], ['c', 'b', 'c'], ['a', 'a', 'b'], ['a', 'b', 'c']]) print(c) m = pysal.Markov(c) print(m.classes) print(m.transitions) #################################################################Classic Markov f = pysal.open( r'C:\Anaconda\Lib\site-packages\pysal\examples\us_income\usjoin.csv') pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)]) print(pci.shape) print(pci) q5 = np.array([pysal.Quantiles(y).yb for y in pci]).transpose() print(q5.shape) print(q5[:, 0]) print(q5[4, :]) m5 = pysal.Markov(q5) print(m5.transitions) print(m5.p) print(m5.steady_state) print(pysal.ergodic.fmpt(m5.p)) ################################################################# Spatial Markov fpci = pci.transpose() / (pci.transpose().mean(axis=0)) print(fpci)
def main(): shp_link = "data/LSOA_2011_London_gen_MHW.shp" # Read in LSOA shapefile try: shp = gpd.read_file(shp_link, crs="+init=epsg:4326") # Set CRS for later mapping shp = shp.to_crs(epsg=3857) except: # Unzip file if not done already zip = ZipFile('data.zip') zip.extractall() shp = gpd.read_file(shp_link, crs="+init=epsg:4326") # Set CRS for later mapping shp = shp.to_crs(epsg=3857) # Read PTAL table PTAL_AV = pd.read_csv("data/AVPTAL2015_LSOA2011.csv") # Rename column for merge PTAL_AV = PTAL_AV.rename(columns = {'LSOA2011':'LSOA11CD'}) PTAL = shp.merge(PTAL_AV, on='LSOA11CD') # Rename average PTAL col to easier name PTAL = PTAL.rename(columns = {'AvPTAI2015':'AV_PTAL'}) # Read in bokeh modules from collections import OrderedDict from bokeh.plotting import figure, show, output_notebook, ColumnDataSource from bokeh.models import HoverTool from bokeh.io import output_file, show #Scatter # Convert geomertries from gpd to bokeh format lons, lats = gpd_bokeh(PTAL) # Create PTAL Rate quantile bins in pysal bins_q5 = ps.Quantiles(PTAL.AV_PTAL, k=5) # Creat colour classes from bins bwr = plt.cm.get_cmap('Reds') bwr(.76) c5 = [bwr(c) for c in [0.2, 0.4, 0.6, 0.7, 1.0]] classes = bins_q5.yb colors = [c5[i] for i in classes] colors5 = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77"] colors = [colors5[i] for i in classes] p = figure(title="London PTAI 2015 Quintiles", toolbar_location='left', plot_width=1100, plot_height=700) p.patches(lons, lats, fill_alpha=0.7, fill_color=colors, line_color="#884444", line_width=2, line_alpha=0.3) # Now for interactive plot from bokeh.models import HoverTool from bokeh.plotting import figure, show, output_file, ColumnDataSource from bokeh.tile_providers import STAMEN_TERRAIN, CARTODBPOSITRON_RETINA source = ColumnDataSource(data=dict( x=lons, y=lats, color=colors, name=PTAL.LSOA11NM, rate=PTAL.AV_PTAL )) # Add bokeh tools for interactive mapping TOOLS = "pan, wheel_zoom, box_zoom, reset, hover, save" p = figure(title="London Average PTAL Index ", tools=TOOLS, plot_width=900, plot_height=900) p.patches('x', 'y', source=source, fill_color='color', fill_alpha=0.7, line_color='white', line_width=0.5) # Define what info shows with mouse hover hover = p.select_one(HoverTool) hover.point_policy = 'follow_mouse' hover.tooltips = [ ("Name", "@name"), ("PTAL rank", "@rate"), ] # Turn off axis p.axis.visible = False # Add basemap p.add_tile(CARTODBPOSITRON_RETINA) # Output file to html output_file("London_PTAL2015.html", title="Average_PTAL_2015") show(p)
def setUp(self): self.data = ps.pdio.read_files(ps.examples.get_path('south.shp')) self.test_attribute = 'HR70' self.k = 10 self.breaks = ps.Quantiles(self.data[self.test_attribute].values, k=self.k).bins self.pal = mplpal.Inferno_10
def spatial_trend(self, subquery, time_cols, num_classes=7, w_type='knn', num_ngbrs=5, permutations=0, geom_col='the_geom', id_col='cartodb_id'): """ Predict the trends of a unit based on: 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) 2. average class of its neighbors Inputs: @param subquery string: e.g., SELECT the_geom, cartodb_id, interesting_time_column FROM table_name @param time_cols list of strings: list of strings of column names @param num_classes (optional): number of classes to break distribution of values into. Currently uses quantile bins. @param w_type string (optional): weight type ('knn' or 'queen') @param num_ngbrs int (optional): number of neighbors (if knn type) @param permutations int (optional): number of permutations for test stats @param geom_col string (optional): name of column which contains the geometries @param id_col string (optional): name of column which has the ids of the table Outputs: @param trend_up float: probablity that a geom will move to a higher class @param trend_down float: probablity that a geom will move to a lower class @param trend float: (trend_up - trend_down) / trend_static @param volatility float: a measure of the volatility based on probability stddev(prob array) """ if len(time_cols) < 2: plpy.error('More than one time column needs to be passed') params = { "id_col": id_col, "time_cols": time_cols, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs } result = self.data_provider.get_markov(w_type, params) # build weight weights = pu.get_weight(result, w_type) weights.transform = 'r' # prep time data t_data = get_time_data(result, time_cols) sp_markov_result = ps.Spatial_Markov(t_data, weights, k=num_classes, fixed=False, permutations=permutations) # get lag classes lag_classes = ps.Quantiles(ps.lag_spatial(weights, t_data[:, -1]), k=num_classes).yb # look up probablity distribution for each unit according to class and # lag class prob_dist = get_prob_dist(sp_markov_result.P, lag_classes, sp_markov_result.classes[:, -1]) # find the ups and down and overall distribution of each cell trend_up, trend_down, trend, volatility = get_prob_stats( prob_dist, sp_markov_result.classes[:, -1]) # output the results return zip(trend, trend_up, trend_down, volatility, weights.id_order)
def choropleth_map(jsonpath, key, attribute, df=None, classification="Quantiles", classes=5, bins=None, std=None, centroid=None, zoom_start=5, tiles='OpenStreetMap', fill_color="YlGn", fill_opacity=.5, line_opacity=0.2, legend_name='', save=True): ''' One-shot mapping function for folium-based choropleth mapping. jsonpath - the filepath to a JSON file key - the field upon which the JSON and the dataframe will be linked attribute - the attribute to be mapped The rest of the arguments are keyword: classification - type of classification scheme to be used classes - number of classes used bins - breakpoints, if manual classes are desired ''' #Polymorphism by hand... if isinstance(jsonpath, str): if os.path.isfile(jsonpath): sjson = gj.load(open(jsonpath)) else: raise IOError('File not found') if isinstance(jsonpath, dict): raise NotImplementedError( 'Direct mapping from dictionary not yet supported') #with open('tmp.json', 'w') as out: # gj.dump(jsonpath, out) # sjson = gj.load(open('tmp.json')) if isinstance(jsonpath, tuple): if 'ShpWrapper' in str(type(jsonpath[0])) and 'DBF' in str( type(jsonpath[1])): flip('tmp.json', jsonpath[0], jsonpath[1]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' elif 'ShpWrapper' in str(type(jsonpath[1])) and 'DBF' in str( type(jsonpath[0])): flip('tmp.json', jsonpath[1], jsonpath[0]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' else: raise IOError( 'Inputs must be GeoJSON filepath, GeoJSON dictionary in memory, or shp-dbf tuple' ) #key construction if df is None: df = json2df(sjson) dfkey = [key, attribute] #centroid search if centroid == None: if 'bbox' in sjson.keys(): bbox = sjson.bbox bbox = bboxsearch(sjson) xs = sum([bbox[0], bbox[2]]) / 2. ys = sum([bbox[1], bbox[3]]) / 2. centroid = [ys, xs] jsonkey = 'feature.properties.' + key choromap = fm.Map( location=centroid, zoom_start=zoom_start, tiles=tiles) # all the elements you need to make a choropleth #standardization if std != None: if isinstance(std, int) or isinstance(std, float): y = np.array(df[attribute] / std) elif type(std) == str: y = np.array(df[attribute] / df[std]) elif callable(std): raise NotImplementedError( 'Functional Standardizations are not implemented yet') else: raise ValueError( 'Standardization must be integer, float, function, or Series') else: y = np.array(df[attribute].tolist()) #For people who don't read documentation... if isinstance(classes, list): bins = classes classes = len(bins) elif isinstance(classes, float): try: classes = int(classes) except: raise ValueError('Classes must be coercable to integers') #classification passing if classification != None: if classification == "Maximum Breaks": #there is probably a better way to do this, but it's a start. mapclass = ps.Maximum_Breaks(y, k=classes).bins.tolist() elif classification == 'Quantiles': mapclass = ps.Quantiles(y, k=classes).bins.tolist() elif classification == 'Fisher-Jenks': mapclass = ps.Fisher_Jenks(y, k=classes).bins elif classification == 'Equal Interval': mapclass = ps.Equal_Interval(y, k=classes).bins.tolist() elif classification == 'Natural Breaks': mapclass = ps.Natural_Breaks(y, k=classes).bins elif classification == 'Jenks Caspall Forced': raise NotImplementedError( 'Jenks Caspall Forced is not implemented yet.') # mapclass = ps.Jenks_Caspall_Forced(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall Sampled': raise NotImplementedError( 'Jenks Caspall Sampled is not implemented yet') # mapclass = ps.Jenks_Caspall_Sampled(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall': mapclass = ps.Jenks_Caspall(y, k=classes).bins.tolist() elif classification == 'User Defined': mapclass = bins elif classification == 'Standard Deviation': if bins == None: l = classes / 2 bins = range(-l, l + 1) mapclass = list(ps.Std_Mean(y, bins).bins) else: mapclass = list(ps.Std_Mean(y, bins).bins) elif classification == 'Percentiles': if bins == None: bins = [1, 10, 50, 90, 99, 100] mapclass = list(ps.Percentiles(y, bins).bins) else: mapclass = list(ps.Percentiles(y, bins).bins) elif classification == 'Max P': #raise NotImplementedError('Max-P classification is not implemented yet') mapclass = ps.Max_P_Classifier(y, k=classes).bins.tolist() else: raise NotImplementedError( 'Your classification is not supported or was not found. Supported classifications are:\n "Maximum Breaks"\n "Quantiles"\n "Fisher-Jenks"\n "Equal Interval"\n "Natural Breaks"\n "Jenks Caspall"\n "User Defined"\n "Percentiles"\n "Max P"' ) else: print('Classification forced to None. Defaulting to Quartiles') mapclass = ps.Quantiles(y, k=classes).bins.tolist() #folium call, try abstracting to a "mapper" function, passing list of args choromap.geo_json(geo_path=jsonpath, key_on=jsonkey, data=df, columns=dfkey, fill_color=fill_color, fill_opacity=fill_opacity, line_opacity=line_opacity, threshold_scale=mapclass[:-1], legend_name=legend_name) if save: fname = jsonpath.rstrip('.json') + '_' + attribute + '.html' choromap.save(fname) return choromap
def createClassifyMap(self, map_type): """ return an instance of pysal.Map_Classifier """ id_group = [] color_group = [] label_group = [] if map_type == stars.MAP_CLASSIFY_EQUAL_INTERVAL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Equal_Interval(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_PERCENTILES: pct = [1, 10, 50, 90, 99, 100] # doesn't support different defined pct #if self.params.has_key("pct"): # pct = self.params["pct"] cm = pysal.Percentiles(self.data, pct=pct) counts = list(cm.counts) n_counts = len(counts) if n_counts < 6: for i in range(6 - n_counts): counts.append(0) label_group = [ '<1%%(%d)' % counts[0], '1%% - 10%%(%d)' % counts[1], '10%% - 50%%(%d)' % counts[2], '50%% - 90%%(%d)' % counts[3], '90%% - 99%%(%d)' % counts[4], '>99%%(%d)' % counts[5] ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(3, 6, True) elif map_type == stars.MAP_CLASSIFY_BOX_PLOT: hinge = 1.5 # default if self.params.has_key("hinge"): hinge = self.params["hinge"] cm = pysal.Box_Plot(self.data, hinge=hinge) n_bins = len(cm.bins) if n_bins == 5: n_upper_outlier = 0 else: n_upper_outlier = cm.counts[5] label_group = [ 'Lower outlier(%d)' % cm.counts[0], '<25%% (%d)' % cm.counts[1], '25%% - 50%% (%d)' % cm.counts[2], '50%% - 75%% (%d)' % cm.counts[3], '>75%% (%d)' % cm.counts[4], 'Upper outlier (%d)' % n_upper_outlier ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(2, 6, False) elif map_type == stars.MAP_CLASSIFY_QUANTILES: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Quantiles(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_STD_MEAN: cm = pysal.Std_Mean(self.data, multiples=[-2, -1, 0, 1, 2]) n_bins = len(cm.bins) elif map_type == stars.MAP_CLASSIFY_MAXIMUM_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Maximum_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_NATURAL_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Natural_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_FISHER_JENKS: cm = pysal.Fisher_Jenks(self.data) # see blow: common label group and color group elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k([i[0] for i in cm.bins], cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_SAMPLED: k = 5 # default pct = 0.1 if self.params.has_key("k"): k = self.params["k"] if self.params.has_key("pct"): pct = self.params["pct"] cm = pysal.Jenks_Caspall_Sampled(self.data, k=k, pct=pct) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_FORCED: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall_Forced(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_USER_DEFINED: assert self.params.has_key("bins") bins = self.params["bins"] cm = pysal.User_Defined(self.data, bins=bins) k = len(bins) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_MAX_P: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Max_P_Classifier(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_UNIQUE_VALUES: id_group_dict = {} id_other = [] n = 0 for i, item in enumerate(self.data): if n < 10: if not id_group_dict.has_key(item): id_group_dict[item] = [] n += 1 if id_group_dict.has_key(item): id_group_dict[item].append(i) else: id_other.append(i) id_group = id_group_dict.values() unique_values = id_group_dict.keys() max_num_values = n if n <= 10 else 10 label_group = [ str(unique_values[i]) for i in range(max_num_values) ] color_group = [ stars.MAP_COLOR_12_UNIQUE_FILL[i] for i in range(max_num_values) ] #color_group = self.pick_color_set(1, max_num_values,False) if n >= 10: id_group.append(id_other) label_group.append('Others') color_group.append(stars.MAP_COLOR_12_UNIQUE_OTHER) field_name = self.params['field_name'] id_group.insert(0, []) label_group.insert(0, field_name) color_group.insert(0, None) else: raise KeyError, 'Classify map type is illegal' # for some common label group and color group if map_type in [ stars.MAP_CLASSIFY_FISHER_JENKS, stars.MAP_CLASSIFY_STD_MEAN ]: """ upper_bound = 0 if len(cm.counts) == 5 else cm.counts[5] label_group = ['<%s (%d)'% (cm.bins[0],cm.counts[0]), '%s - %s (%d)'% (cm.bins[0], cm.bins[1],cm.counts[1]), '%s - %s (%d)'% (cm.bins[1], cm.bins[2], cm.counts[2]), '%s - %s (%d)'% (cm.bins[2], cm.bins[3], cm.counts[3]), '%s - %s (%d)'% (cm.bins[3], cm.bins[4], cm.counts[4]), '>%s (%d)'% (cm.bins[4], upper_bound)] #color_group = self._get_default_color_schema(len(cm.bins)) color_group = self.pick_color_set(3,7,False)[1:] """ label_group = self._get_range_labels(cm.bins, cm.counts) color_group = self.pick_color_set(3, len(cm.bins), True) #[1:] if map_type != stars.MAP_CLASSIFY_UNIQUE_VALUES: # convert binIds = cm.yb bins = cm.bins n_group = len(bins) id_group = [[] for i in range(n_group)] for i, gid in enumerate(binIds): id_group[gid].append(i) return id_group, label_group, color_group
#Accounts for the acutocorrelation in the model with the weight matrix # data = ps.pdio.read_files(shape) Queen = ps.queen_from_shapefile(shape) Queen.transform = 'r' percent16Lag = ps.lag_spatial(Queen, data.percent16) # In[4]: #This is a spatial lag graph of the percentages of suicide for the year 2016. #Spatial lag is a form of regression that accounts for the weight matrix of the shape file #and the dependent veriable that you have chosen. import matplotlib.pyplot as plt us = file percent16LagQ16 = ps.Quantiles(percent16Lag, k=10) f, ax = plt.subplots(1, figsize=(150, 150)) us.assign(cl=percent16LagQ16.yb).plot(column='cl', categorical=True, k=10, cmap='OrRd', linewidth=0.1, ax=ax, edgecolor='white', legend=True) ax.set_axis_off() plt.title("Percentage of Suicides 2016 Spatial Lag Deciles") plt.show() # In[17]:
def column_kde(series_to_plot, num_bins=7, split_type="quantiles", bw=0.15, plot_title="", xlabel="x", ylabel="y"): """ v1.0 function that plots: Kernel Density Estimation (KDE) rugplot shows a classification of the distribution based on 'num_bins' and 'split_type' Plots data from the global variable (GeoDataFrame) 'teranet_da_gdf' ---------------- Input arguments: series_to_plot -- pandas Series -- series to be plotted num_bins -- int -- number of bins to be used for the split of the distribution (default=7) split_type -- str -- type of the split of the distribution (default='quantiles') must be either 'quantiles', 'equal_interval', or 'fisher_jenks' bw -- float -- bandwidth to be used for KDE (default=0.15) -------- Returns: None, plots a KDE, rugplot, and bins of values in 'column_to_plot' """ # generate a list of bins from the split of the distribution using type of split provided in 'split_type' if split_type == 'quantiles': classi = ps.Quantiles(series_to_plot, k=num_bins) elif split_type == 'equal_interval': classi = ps.Equal_Interval(series_to_plot, k=num_bins) elif split_type == 'fisher_jenks': classi = ps.Fisher_Jenks(series_to_plot, k=num_bins) elif type(split_type) == str: raise ValueError("Input parameter 'split_type' must be either 'quantiles', " + "'equal_interval', or 'fisher_jenks'.") else: raise TypeError("Input parameter 'split_type' must be a string and either 'quantiles', " + "'equal_interval, or 'fisher_jenks'.") # print the bins print(classi) # create figure and axis f, ax = plt.subplots(1, figsize=(9, 6)) # plot KDE of the distribution sns.kdeplot(series_to_plot, shade=True, label='Distribution of counts of Teranet records per DA', bw=bw) # plot a rugplot sns.rugplot(series_to_plot, alpha=0.5) # plot the split of the distribution for classi_bin in classi.bins: ax.axvline(classi_bin, color='magenta', linewidth=1, linestyle='--') # plot the mean and the median ax.axvline(series_to_plot.mean(), color='deeppink', linestyle='--', linewidth=1) ax.text(series_to_plot.mean(), 0, "Mean: {0:.2f}".format(series_to_plot.mean()), rotation=90) ax.axvline(series_to_plot.median(), color='coral', linestyle=':') ax.text(series_to_plot.median(), 0, "Median: {0:.2f}".format(series_to_plot.median()), rotation=90) # configure axis parameters ax.set_title(plot_title, fontdict={'fontsize': '18', 'fontweight': '3'}) ax.set_xlabel(xlabel, fontdict={'fontsize': '16', 'fontweight': '3'}) ax.set_ylabel(ylabel, fontdict={'fontsize': '16', 'fontweight': '3'}) ax.legend(loc='best') plt.show()
## prep time data t_data = get_time_data(query_result, time_cols) plpy.debug('shape of t_data %d, %d' % t_data.shape) plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape) plpy.debug('first num elements: %f' % t_data[0, 0]) sp_markov_result = ps.Spatial_Markov(t_data, weights, k=num_classes, fixed=False, permutations=permutations) ## get lag classes lag_classes = ps.Quantiles(ps.lag_spatial(weights, t_data[:, -1]), k=num_classes).yb ## look up probablity distribution for each unit according to class and lag class prob_dist = get_prob_dist(sp_markov_result.P, lag_classes, sp_markov_result.classes[:, -1]) ## find the ups and down and overall distribution of each cell trend_up, trend_down, trend, volatility = get_prob_stats( prob_dist, sp_markov_result.classes[:, -1]) ## output the results return zip(trend, trend_up, trend_down, volatility, weights.id_order) def get_time_data(markov_data, time_cols): """
# %matplotlib inline import pysal as ps import pandas as pd import numpy as np from flask import Flask, render_template, request from pysal.contrib.viz import mapping as maps import matplotlib.pyplot as plt import geopandas as gpd import os, time, glob import wtforms data = ps.pdio.read_files("C:\zoovision\data\Region1.shp") data.head() shp_link = "C:\zoovision\data\Region1.shp" tx = gpd.read_file(shp_link) hr10 = ps.Quantiles(data.ind_100t, k=10) f, ax = plt.subplots(1, figsize=(9, 9)) tx.assign(cl=hr10.yb).plot(column='cl', categorical=True, \ k=10, cmap='OrRd', linewidth=0.1, ax=ax, \ edgecolor='white', legend=True) ax.set_axis_off() plt.title("ind_100t Deciles") plt.show() if not os.path.isdir('static'): os.mkdir('static') else: # Remove old plot files for filename in glob.glob(os.path.join('static', '*.png')): os.remove(filename) # Use time since Jan 1, 1970 in filename in order make
import matplotlib.pyplot as plt from matplotlib.widgets import Slider, Button, RadioButtons # %matplotlib inline import pysal as ps from pysal.contrib.viz import mapping as maps import matplotlib.pyplot as plt import geopandas as gpd from pandas import DataFrame from geopandas import GeoDataFrame import os, time, glob fp = "C:\zoovision\data\states\states2.shp" rg1 = gpd.read_file(fp) rg1 = rg1.to_crs(epsg=2163) fig, ax = plt.subplots(1, figsize=(15, 10)) hr10 = ps.Quantiles(rg1.POP10_SQMI, k=10) # title = "Select parameters and press query to view surveillance summary" # ax.set_title(title, y=1.08, fontsize=30) ax.set_axis_off() rg1.plot(ax=ax) # rg1.assign(cl=hr10.yb).plot(column='cl', categorical=True, \ # linewidth=0.5, ax=ax, # k = 10, cmap='BuGn', edgecolor='black') if not os.path.isdir('static'): os.mkdir('static') else: # Remove old plot files for filename in glob.glob(os.path.join('static', '*.png')): os.remove(filename)