def main(): # Parameters # ---------- show_legend = False #True show_title_text = False #True # Plot base-grid (with no-data hashes) show_noData = False # Choose classifier (if None, use self specified classification) #'NaturalBreaks' #'JenksCaspall' #'MaximumBreaks' #'FisherJenks' #"HeadTail" mapclassifier = None # Filepaths data_fp = "data/MFD_Population_24H_Tallinn_500m_grid.shp" roads_fp = "data/Tallinn_main_roads_for_visualization.shp" boundaries_fp = "data/TLN_bordersDASY.shp" water_fp = "data/TLN_water_clip_OSM.shp" outdir = "results/population_maps" # Read files data = gpd.read_file(data_fp) roads = gpd.read_file(roads_fp) boundaries = gpd.read_file(boundaries_fp) water = gpd.read_file(water_fp) # Re-project all into the same crs as grid roads['geometry'] = roads['geometry'].to_crs(crs=data.crs) roads.crs = data.crs boundaries['geometry'] = boundaries['geometry'].to_crs(crs=data.crs) boundaries.crs = data.crs water['geometry'] = water['geometry'].to_crs(crs=data.crs) water.crs = data.crs # Take only largest waterbodies water['area'] = water.area water = water.sort_values(by='area', ascending=False) water.reset_index(inplace=True) water = water.ix[0:2] # Time columns showing the share of population at different hours tcols = ["H%s" % num for num in range(0, 24)] # Multiply by 100 to get them into percentage (0-100 representation) data[tcols] = data[tcols] * 100 # Create Custom classifier # bins are the upper boundary of the class (including the value itself) # --------------------------------------------------------------------- # Natural Breaks classification (7 classes) that has been rounded (to have a more intuitive legend) my_bins = [0.05, 0.10, 0.20, 0.40, 0.80, 1.6, 3.97] # Classify following columns ccolumns = tcols if mapclassifier: # Stack all values stacked_values = stackColumnValues(df=data, columns=ccolumns) # Classify values based on specific classifier n = 7 my_bins = [x for x in range(n)] if mapclassifier == 'HeadTail': classif = ps.esda.mapclassify.HeadTail_Breaks(stacked_values) elif mapclassifier == 'FisherJenks': classif = ps.Fisher_Jenks(stacked_values, k=n) elif mapclassifier == 'NaturalBreaks': classif = ps.Natural_Breaks(stacked_values, k=n) elif mapclassifier == 'MaximumBreaks': classif = ps.Maximum_Breaks(stacked_values, k=n) elif mapclassifier == 'JenksCaspall': classif = ps.Jenks_Caspall(stacked_values, k=n) # Get bins my_bins = list(classif.bins) # Apply the chosen classification classifier = ps.User_Defined.make(bins=my_bins) classif = data[ccolumns].apply(classifier) # Rename classified column names (add letter c in front) classif.columns = list(map(lambda x: "c" + x, classif.columns)) # Join back to grid data = data.join(classif) # Classified columns showing the distribution of the population ccols = ["cH%s" % num for num in range(0, 24)] # Rename columns and take the 'H' letter from the beginning away data, new_cols = renameTo24HourSystem(data, tcols, minutes=True) # Select color palette palette = sns.diverging_palette(220, 20, n=len(my_bins)) # Get hex colors hex_colors = parseHexSeaborn(palette) # Change White color into more reddish hex_colors[3] = '#FFF2F2' N = len(hex_colors) # Convert to rgb legendcolors = [col.hex2color(hexcol) for hexcol in hex_colors] # Legend labels binlabels = np.array(my_bins) rbinlabels = binlabels.round(2) legend_labels = list(rbinlabels) legend_labels.insert(0, 0) for tattribute in new_cols: # Color balancer color_balancer = list(hex_colors) # Print the classes classcol = "cH%s" % int(tattribute[0:2]) classes = list(data[classcol].unique()) classes.sort() print("%s \t N-classes: %s \t Classes: " % (tattribute, len(classes)), classes) # If there is no values for all classes, remove the color of the specific # class that is missing (so that coloring scheme is identical for all times) if len(classes) < N: class_values = [val for val in range(N)] # Put values in reverse order class_values.reverse() # Find out which classes are missing and remove the color for i in class_values: if not i in classes: del color_balancer[i] # Convert to rgb rgbcolors = [col.hex2color(hexcol) for hexcol in color_balancer] # Dynamo colormap Ncolor = len(color_balancer) dynamocmap = LinearSegmentedColormap.from_list("my_colormap", rgbcolors, N=Ncolor, gamma=1.0) # Initialize Figure if not show_legend: fig, ax = plt.subplots() else: fig = plt.figure(figsize=(8, 7)) # Add axes (1 for image, 2 for custom legend) ax = fig.add_axes( [0.05, 0.15, 0.8, 0.65]) #([DistFromLeft, DistFromBottom, Width, Height]) ax1 = fig.add_axes([0.2, 0.08, 0.6, 0.035]) # Column name for shop information name = "h%s" % int(tattribute[0:2]) if show_noData: # Plot base grid if show_legend: data.plot(ax=ax, color='white', linewidth=0.1, hatch='x', edgecolor='grey', legend=True) else: data.plot(ax=ax, color='white', linewidth=0.1, hatch='x', edgecolor='grey') else: if show_legend: data.plot(ax=ax, color='white', linewidth=0, edgecolor='grey', legend=True) else: data.plot(ax=ax, color='white', linewidth=0, edgecolor='grey') # Clip grid with boundaries data = gpd.overlay(data, boundaries, how='intersection') # Plot the map using custom color map (use the classified column) ax = plotCustomColors(ax=ax, df=data, column=classcol, custom_cmap=dynamocmap, linewidth=0.05, edgecolor='grey') # Plot water bodies water.plot(ax=ax, color='white', alpha=1.0, linewidth=0, edgecolor='grey') #linewidth=0.05 # Plot roads roads.plot(ax=ax, color='grey', lw=0.8, alpha=0.8) # Specify y and x-lim ax.set_xlim(left=531000, right=553000) ax.set_ylim(top=6596000, bottom=6579400) # Remove tick markers ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) # Info texts info_text = "%s" % (tattribute) if not show_legend: ppos_x = 540000 ppos_y = 6595500 else: ppos_x = 540000 ppos_y = 6596500 # Add text about time ax.text(ppos_x, ppos_y, info_text, size=30, color='black', **{'fontname': 'Arial'}) # Add title text if show_title_text: ax.text( ppos_x - 5000, ppos_y + 2000, "Population distribution in Tallinn\n based on mobile phone data", size=20, color='gray', **{'fontname': 'Arial'}) # Add legend if show_legend: ax1.imshow(np.arange(N).reshape(1, N), cmap=mpl.colors.ListedColormap(list(legendcolors)), interpolation="nearest", aspect="auto") # Set locations of the bins ax1.set_xticks(np.arange(N + 1) - .5) ax1.set_yticks([]) # Specify the labels ax1.set_xticklabels(legend_labels) # Set colorbar title cbar_title = 'Share of population (%)' pos_x = 0.25 pos_y = 0.123 plt.figtext(pos_x, pos_y, cbar_title, size=12) # Save figure resolution = 500 outpath = os.path.join( outdir, "%s_PopulationDistribution_map_%sdpi.png" % (tattribute[0:2], resolution)) # Don't show axis borders ax.axis('off') if not show_legend: plt.tight_layout() plt.savefig(outpath, dpi=resolution) #plt.show() plt.close()
# print collectorNodeDic remove = [node for node, degree in G.degree().items() if degree < -1] # remove1 = [degree for node,degree in G.degree().items()] # print remove1 # sys.exit() collSpecimenCount_temp = [] for k, v in collectorNodeDic.iteritems(): collSpecimenCount_temp.append(v) collSpecimenCount = np.array(collSpecimenCount_temp) collSpecimenBreaks = pysal.Natural_Breaks(collSpecimenCount, k=4) print collSpecimenBreaks # greater1000Coll = [ (k) for k,r in collectorNodeDic.iteritems() if r > collSpecimenBreaks.bins[2] and k in nodeList and k not in remove] # greater500Coll = [ (k) for k,r in collectorNodeDic.iteritems() if r > collSpecimenBreaks.bins[1] and r < collSpecimenBreaks.bins[2] and k in nodeList and k not in remove] # greater100Coll = [ (k) for k,r in collectorNodeDic.iteritems() if r > collSpecimenBreaks.bins[0] and r < collSpecimenBreaks.bins[1] and k in nodeList and k not in remove] # less100Coll = [ (k) for k,r in collectorNodeDic.iteritems() if r < collSpecimenBreaks.bins[0] and k in nodeList and k not in remove] # # if r['x'] > 92 and # elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >5] # print elarge collEdgeCount_temp = [ d['weight'] for (u, v, d) in G.edges(data=True)
def choropleth_map(jsonpath, key, attribute, df=None, classification="Quantiles", classes=5, bins=None, std=None, centroid=None, zoom_start=5, tiles='OpenStreetMap', fill_color="YlGn", fill_opacity=.5, line_opacity=0.2, legend_name='', save=True): ''' One-shot mapping function for folium-based choropleth mapping. jsonpath - the filepath to a JSON file key - the field upon which the JSON and the dataframe will be linked attribute - the attribute to be mapped The rest of the arguments are keyword: classification - type of classification scheme to be used classes - number of classes used bins - breakpoints, if manual classes are desired ''' #Polymorphism by hand... if isinstance(jsonpath, str): if os.path.isfile(jsonpath): sjson = gj.load(open(jsonpath)) else: raise IOError('File not found') if isinstance(jsonpath, dict): raise NotImplementedError( 'Direct mapping from dictionary not yet supported') #with open('tmp.json', 'w') as out: # gj.dump(jsonpath, out) # sjson = gj.load(open('tmp.json')) if isinstance(jsonpath, tuple): if 'ShpWrapper' in str(type(jsonpath[0])) and 'DBF' in str( type(jsonpath[1])): flip('tmp.json', jsonpath[0], jsonpath[1]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' elif 'ShpWrapper' in str(type(jsonpath[1])) and 'DBF' in str( type(jsonpath[0])): flip('tmp.json', jsonpath[1], jsonpath[0]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' else: raise IOError( 'Inputs must be GeoJSON filepath, GeoJSON dictionary in memory, or shp-dbf tuple' ) #key construction if df is None: df = json2df(sjson) dfkey = [key, attribute] #centroid search if centroid == None: if 'bbox' in sjson.keys(): bbox = sjson.bbox bbox = bboxsearch(sjson) xs = sum([bbox[0], bbox[2]]) / 2. ys = sum([bbox[1], bbox[3]]) / 2. centroid = [ys, xs] jsonkey = 'feature.properties.' + key choromap = fm.Map( location=centroid, zoom_start=zoom_start, tiles=tiles) # all the elements you need to make a choropleth #standardization if std != None: if isinstance(std, int) or isinstance(std, float): y = np.array(df[attribute] / std) elif type(std) == str: y = np.array(df[attribute] / df[std]) elif callable(std): raise NotImplementedError( 'Functional Standardizations are not implemented yet') else: raise ValueError( 'Standardization must be integer, float, function, or Series') else: y = np.array(df[attribute].tolist()) #For people who don't read documentation... if isinstance(classes, list): bins = classes classes = len(bins) elif isinstance(classes, float): try: classes = int(classes) except: raise ValueError('Classes must be coercable to integers') #classification passing if classification != None: if classification == "Maximum Breaks": #there is probably a better way to do this, but it's a start. mapclass = ps.Maximum_Breaks(y, k=classes).bins.tolist() elif classification == 'Quantiles': mapclass = ps.Quantiles(y, k=classes).bins.tolist() elif classification == 'Fisher-Jenks': mapclass = ps.Fisher_Jenks(y, k=classes).bins elif classification == 'Equal Interval': mapclass = ps.Equal_Interval(y, k=classes).bins.tolist() elif classification == 'Natural Breaks': mapclass = ps.Natural_Breaks(y, k=classes).bins elif classification == 'Jenks Caspall Forced': raise NotImplementedError( 'Jenks Caspall Forced is not implemented yet.') # mapclass = ps.Jenks_Caspall_Forced(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall Sampled': raise NotImplementedError( 'Jenks Caspall Sampled is not implemented yet') # mapclass = ps.Jenks_Caspall_Sampled(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall': mapclass = ps.Jenks_Caspall(y, k=classes).bins.tolist() elif classification == 'User Defined': mapclass = bins elif classification == 'Standard Deviation': if bins == None: l = classes / 2 bins = range(-l, l + 1) mapclass = list(ps.Std_Mean(y, bins).bins) else: mapclass = list(ps.Std_Mean(y, bins).bins) elif classification == 'Percentiles': if bins == None: bins = [1, 10, 50, 90, 99, 100] mapclass = list(ps.Percentiles(y, bins).bins) else: mapclass = list(ps.Percentiles(y, bins).bins) elif classification == 'Max P': #raise NotImplementedError('Max-P classification is not implemented yet') mapclass = ps.Max_P_Classifier(y, k=classes).bins.tolist() else: raise NotImplementedError( 'Your classification is not supported or was not found. Supported classifications are:\n "Maximum Breaks"\n "Quantiles"\n "Fisher-Jenks"\n "Equal Interval"\n "Natural Breaks"\n "Jenks Caspall"\n "User Defined"\n "Percentiles"\n "Max P"' ) else: print('Classification forced to None. Defaulting to Quartiles') mapclass = ps.Quantiles(y, k=classes).bins.tolist() #folium call, try abstracting to a "mapper" function, passing list of args choromap.geo_json(geo_path=jsonpath, key_on=jsonkey, data=df, columns=dfkey, fill_color=fill_color, fill_opacity=fill_opacity, line_opacity=line_opacity, threshold_scale=mapclass[:-1], legend_name=legend_name) if save: fname = jsonpath.rstrip('.json') + '_' + attribute + '.html' choromap.save(fname) return choromap
def createClassifyMap(self, map_type): """ return an instance of pysal.Map_Classifier """ id_group = [] color_group = [] label_group = [] if map_type == stars.MAP_CLASSIFY_EQUAL_INTERVAL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Equal_Interval(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_PERCENTILES: pct = [1, 10, 50, 90, 99, 100] # doesn't support different defined pct #if self.params.has_key("pct"): # pct = self.params["pct"] cm = pysal.Percentiles(self.data, pct=pct) counts = list(cm.counts) n_counts = len(counts) if n_counts < 6: for i in range(6 - n_counts): counts.append(0) label_group = [ '<1%%(%d)' % counts[0], '1%% - 10%%(%d)' % counts[1], '10%% - 50%%(%d)' % counts[2], '50%% - 90%%(%d)' % counts[3], '90%% - 99%%(%d)' % counts[4], '>99%%(%d)' % counts[5] ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(3, 6, True) elif map_type == stars.MAP_CLASSIFY_BOX_PLOT: hinge = 1.5 # default if self.params.has_key("hinge"): hinge = self.params["hinge"] cm = pysal.Box_Plot(self.data, hinge=hinge) n_bins = len(cm.bins) if n_bins == 5: n_upper_outlier = 0 else: n_upper_outlier = cm.counts[5] label_group = [ 'Lower outlier(%d)' % cm.counts[0], '<25%% (%d)' % cm.counts[1], '25%% - 50%% (%d)' % cm.counts[2], '50%% - 75%% (%d)' % cm.counts[3], '>75%% (%d)' % cm.counts[4], 'Upper outlier (%d)' % n_upper_outlier ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(2, 6, False) elif map_type == stars.MAP_CLASSIFY_QUANTILES: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Quantiles(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_STD_MEAN: cm = pysal.Std_Mean(self.data, multiples=[-2, -1, 0, 1, 2]) n_bins = len(cm.bins) elif map_type == stars.MAP_CLASSIFY_MAXIMUM_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Maximum_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_NATURAL_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Natural_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_FISHER_JENKS: cm = pysal.Fisher_Jenks(self.data) # see blow: common label group and color group elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k([i[0] for i in cm.bins], cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_SAMPLED: k = 5 # default pct = 0.1 if self.params.has_key("k"): k = self.params["k"] if self.params.has_key("pct"): pct = self.params["pct"] cm = pysal.Jenks_Caspall_Sampled(self.data, k=k, pct=pct) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_FORCED: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall_Forced(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_USER_DEFINED: assert self.params.has_key("bins") bins = self.params["bins"] cm = pysal.User_Defined(self.data, bins=bins) k = len(bins) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_MAX_P: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Max_P_Classifier(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_UNIQUE_VALUES: id_group_dict = {} id_other = [] n = 0 for i, item in enumerate(self.data): if n < 10: if not id_group_dict.has_key(item): id_group_dict[item] = [] n += 1 if id_group_dict.has_key(item): id_group_dict[item].append(i) else: id_other.append(i) id_group = id_group_dict.values() unique_values = id_group_dict.keys() max_num_values = n if n <= 10 else 10 label_group = [ str(unique_values[i]) for i in range(max_num_values) ] color_group = [ stars.MAP_COLOR_12_UNIQUE_FILL[i] for i in range(max_num_values) ] #color_group = self.pick_color_set(1, max_num_values,False) if n >= 10: id_group.append(id_other) label_group.append('Others') color_group.append(stars.MAP_COLOR_12_UNIQUE_OTHER) field_name = self.params['field_name'] id_group.insert(0, []) label_group.insert(0, field_name) color_group.insert(0, None) else: raise KeyError, 'Classify map type is illegal' # for some common label group and color group if map_type in [ stars.MAP_CLASSIFY_FISHER_JENKS, stars.MAP_CLASSIFY_STD_MEAN ]: """ upper_bound = 0 if len(cm.counts) == 5 else cm.counts[5] label_group = ['<%s (%d)'% (cm.bins[0],cm.counts[0]), '%s - %s (%d)'% (cm.bins[0], cm.bins[1],cm.counts[1]), '%s - %s (%d)'% (cm.bins[1], cm.bins[2], cm.counts[2]), '%s - %s (%d)'% (cm.bins[2], cm.bins[3], cm.counts[3]), '%s - %s (%d)'% (cm.bins[3], cm.bins[4], cm.counts[4]), '>%s (%d)'% (cm.bins[4], upper_bound)] #color_group = self._get_default_color_schema(len(cm.bins)) color_group = self.pick_color_set(3,7,False)[1:] """ label_group = self._get_range_labels(cm.bins, cm.counts) color_group = self.pick_color_set(3, len(cm.bins), True) #[1:] if map_type != stars.MAP_CLASSIFY_UNIQUE_VALUES: # convert binIds = cm.yb bins = cm.bins n_group = len(bins) id_group = [[] for i in range(n_group)] for i, gid in enumerate(binIds): id_group[gid].append(i) return id_group, label_group, color_group