def get_source_geo_and_count_us_continent(state_count_experience_selector_dict, state_count_careerarea_selector_dict, states_map): states = {code: state for code, state in states_map.items()} states_sort_by_name = states.values() states_sort_by_name = sorted(states_sort_by_name, key=lambda x: x['name']) # sort states' geo by its name, it helps decide the order of count data. cause .csv data is already sorted by state name state_xs = [state["lons"] for state in states_sort_by_name] state_ys = [state["lats"] for state in states_sort_by_name] state_names = [state["name"] for state in states_sort_by_name] source = ColumnDataSource( data=dict(x=state_xs, y=state_ys, name=[name + ", United States" for name in state_names])) state_count_total = np.zeros( len(list(state_count_experience_selector_dict.values())[0])) for name, state_count in state_count_experience_selector_dict.items(): state_count_total = [ sum(x) for x in zip(state_count_total, state_count) ] source.add(data=state_count, name='count' + name) source.add(data=state_count_total, name='count_all') source.add(data=state_count_total, name='count') for experience_selector_name, state_count_careerarea in state_count_careerarea_selector_dict.items( ): careerarea_selector_name_initial = 'count' + experience_selector_name for careerarea_name, state_count in state_count_careerarea.items(): source.add(data=state_count, name=careerarea_selector_name_initial + careerarea_name) return source
def create_us_state_map(scores): from bokeh.sampledata.us_states import data as states states = { code: states for code, states in states.items() if code not in ['AK', 'HI'] } state_xs = [state["lons"] for state in states.values()] state_ys = [state["lats"] for state in states.values()] teal_palette = [ '#ffffff', '#e0f2f1', '#b2dfdb', '#80cbc4', '#4db6ac', '#26a69a', '#009688', '#00897b', '#00796b', '#00695c' ] state_names = [state['name'] for state in states.values()] state_scores = [ scores[code] if code in scores.keys() else 0 for code in states.keys() ] color_mapper = LogColorMapper(palette=teal_palette, low=0.01, high=max(scores.values())) data = dict( x=state_xs, y=state_ys, name=state_names, rate=state_scores, ) TOOLS = "pan,wheel_zoom,reset,hover,save" p = figure(title="NLP Ranking Scores Across U.S. States", tools=TOOLS, x_axis_location=None, y_axis_location=None, sizing_mode="scale_width", plot_width=1100, plot_height=700, tooltips=[("State", "@name"), ("Score", "@rate{0,0.00}")]) p.grid.grid_line_color = None p.hover.point_policy = "follow_mouse" p.patches('x', 'y', source=data, fill_color={ 'field': 'rate', 'transform': color_mapper }, fill_alpha=0.7, line_color="black", line_width=0.5) return p
def get_States_Sal(states_dict): # call get_Data() function to get salary data salStateData = get_data() # extract states lat and lon information for generating map states = {code: state for code, state in states_dict.items()} # sort the data by state names states_Name = sorted(states.values(), key=lambda x: x['name']) state_xs = [state["lons"] for state in states_Name] state_ys = [state["lats"] for state in states_Name] state_names = [state["name"] for state in states_Name] # create column data source source = ColumnDataSource(data=dict( x=state_xs, y=state_ys, stateN=[name for name in state_names])) # get average of salary occupation group in each state salStateAgg = pd.DataFrame( salStateData.groupby( ['STATECODE', 'stateName'], axis=0, as_index=False)['SALARYAVERAGE', 'SALARYREALTIMEAVERAGE'].mean()).reset_index() # Create colorMap dictionary keys = tuple(pd.unique(salStateAgg["SALARYREALTIMEAVERAGE"])) values = tuple([ "#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059", "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80", "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09", "#00489C", "#6F0062", "#0CBD66", "#EEC3FF" ]) colorMap = dict(itertools.izip(keys, values)) # add values to the source source.add(data=[str(x) for x in salStateAgg["STATECODE"]], name='statecode') source.add(data=[str(x) for x in salStateAgg["SALARYAVERAGE"]], name='salAvg') source.add(data=[str(x) for x in salStateAgg["SALARYREALTIMEAVERAGE"]], name='salRealAvg') source.add( data=[colorMap[x] for x in salStateAgg["SALARYREALTIMEAVERAGE"]], name='type_color') return source
from bokeh.plotting import figure, output_file, show from bokeh.sampledata.us_states import data as states from bokeh.resources import CDN from bokeh.embed import file_html from bokeh.palettes import PRGn11 as palette from bokeh.palettes import Category20, Spectral11, Category10, PRGn11 try: del states["HI"] del states["AK"] except: pass palette.reverse() states = {code: state for code, state in states.items()} #print(states) state_xs = [states[code]["lons"] for code in states] state_ys = [states[code]["lats"] for code in states] state_names = [state['name'] for state in states.values()] state_rates = [] for name in state_names: abbr = state_pop.loc[name]["State Abb"] state_rates.append( float(df_state_slope[df_state_slope["state"] == abbr]["slope"])) #print(state_rates) lat_inkm = 111.132 ## at around lat = 45degrees from the wiki latitude page lon_inkm = 78.847 ## at around lat = 45degrees from the wiki latitude page
# In order to visualize the data on a map easily, we'll drop the non-continental states and territories. First we'll drop them from the map coordinates from bokeh, and then from the dataframe df2. #First delete/exclude from the bokeh coordinate dictionary and sort them alphabetically from bokeh.plotting import figure, show, output_file from bokeh.sampledata.us_states import data as states del states["HI"] del states["AK"] EXCLUDED = ("ak", "hi", "pr", "gu", "vi", "mp", "as")#Exclude territories import collections#This will allow us to order our states to match coordinates of coord library with data ordStates= collections.OrderedDict(sorted(states.items())) #Now exclude Hawaii, Alaska, and territories from our dataframe df3= df2.drop(['HI','AK','PR','GU','VI','MP','AS','NA'], axis= 1)#For some reason we also have a 'NA' column, drop that too # Now we'll count the number of tweets in in each state in df3 and calculate the mean tweet score for all the columns/states/series (will ignore NaN's, but output is NaN if the list for a state was empty). dfCount= df3.count() dfMean= df3.mean() # So now we have three dataframes: df3 = filtered data, dfCount = tweet count for each state in df3, dfMean= mean tweet score for each state in df3. We'll use these to build our map and <strong>scale and normalize our tweet sentiment score data</strong>. # Since some of our values are negative, we'll account for that as well. #First I'll convert this to a dictionary to play with just the values and order the dictionary to match ordStates
from bokeh.models.widgets import Tabs #Import scripts for tabs from scripts.line import line_tab from scripts.map_bar import map_bar_tab #Import data from bokeh.sampledata.us_states import data as States measles = pd.read_csv("measles.csv") #Create columns for Year and Week_Number measles["year"] = measles["week"].apply(lambda x: int(str(x)[0:4])) measles["week_num"] = measles["week"].apply(lambda x: int(str(x)[4:7])) measles.drop("week", axis = 1, inplace = True) #Create states lookup dict states = { state["name"].upper(): state for code, state in States.items() if state["name"] not in ["Hawaii", "Alaska"] } #Define function for summarising data def summarise(df, group_by): #Group data grouped = df.groupby(by = group_by) #Summarise data as Series then convert back to Dataframe cases_sum = pd.DataFrame(grouped["cases"].sum()).reset_index() cases_avg = pd.DataFrame(grouped["cases"].mean()).reset_index() avg_incidence_year = pd.DataFrame(grouped["incidence_per_capita"].mean()).reset_index() #Give columns sensible names avg_incidence_year = avg_incidence_year.rename(columns = {"incidence_per_capita": "avg_incidence_per_week"}) cases_sum = cases_sum.rename(columns = {"cases": "total_cases_per_year"}) cases_avg = cases_avg.rename(columns = {"cases": "avg_cases_per_week"}) #Merge dataframes
from bokeh.plotting import figure, output_file, show from bokeh.sampledata.airport_routes import airports, routes from bokeh.sampledata.us_states import data as us_states output_file("graphs.html") airports.set_index("AirportID", inplace=True) airports.index.rename("index", inplace=True) routes.rename(columns={ "SourceID": "start", "DestinationID": "end" }, inplace=True) lats, lons = [], [] for k, v in us_states.items(): lats.append(np.array(v['lats'])) lons.append(np.array(v['lons'])) source = ColumnDataSource(data=dict(lats=lats, lons=lons)) graph_layout = dict( zip(airports.index.astype(str), zip(airports.Longitude, airports.Latitude))) layout_provider = StaticLayoutProvider(graph_layout=graph_layout) fig = figure(x_range=(-180, -60), y_range=(15, 75), x_axis_label="Longitude", y_axis_label="Latitude", plot_width=800,
import time import numpy as np from bokeh.io import curdoc from bokeh.models import HoverTool, HBox, VBox, Slider, Toggle from bokeh.plotting import figure, show, ColumnDataSource from bokeh.sampledata.us_states import data as states from bokeh.palettes import Purples9 states = { code: state for code, state in states.items() if code not in ['HI', 'AK'] } def gen_initial_rate(y): return min(np.random.choice([15, 40]) + np.random.uniform(-10, 10), 100) state_xs = [state['lons'] for state in states.values()] state_ys = [state['lats'] for state in states.values()] colors = Purples9[::-1] names = [state['name'] for state in states.values()] initial_rates = [gen_initial_rate(1) for _ in states.values()] state_colors = [colors[int(rate / 20)] for rate in initial_rates] source = ColumnDataSource(data=dict( x=state_xs, y=state_ys, color=state_colors, name=names, rate=initial_rates))
import time import numpy as np from bokeh.io import curdoc from bokeh.models import HoverTool, HBox, VBox, Slider, Toggle from bokeh.plotting import figure, show, ColumnDataSource from bokeh.sampledata.us_states import data as states from bokeh.palettes import Purples9 states = { code: state for code, state in states.items() if code not in ['HI', 'AK'] } def gen_initial_rate(y): return min( np.random.choice([15, 40]) + np.random.uniform(-10, 10), 100 ) state_xs = [state['lons'] for state in states.values()] state_ys = [state['lats'] for state in states.values()] colors = Purples9[::-1] names = [state['name'] for state in states.values()] initial_rates = [gen_initial_rate(1) for _ in states.values()] state_colors = [colors[int(rate / 20)] for rate in initial_rates] source = ColumnDataSource(data=dict( x=state_xs,
from bokeh.models.graphs import NodesAndLinkedEdges from bokeh.palettes import Set3_12 from bokeh.plotting import figure, show, output_file from bokeh.sampledata.us_states import data as us_states from bokeh.sampledata.airport_routes import airports, routes import numpy as np output_file("graphs.html") airports.set_index("AirportID", inplace=True) airports.index.rename("index", inplace=True) routes.rename(columns={"SourceID": "start", "DestinationID": "end"}, inplace=True) lats, lons = [], [] for k, v in us_states.items(): lats.append(np.array(v['lats'])) lons.append(np.array(v['lons'])) source = ColumnDataSource(data=dict(lats=lats, lons=lons)) graph_layout = dict(zip(airports.index.astype(str), zip(airports.Longitude, airports.Latitude))) layout_provider = StaticLayoutProvider(graph_layout=graph_layout) fig = figure(x_range=(-180, -60), y_range=(15,75), x_axis_label="Longitude", y_axis_label="Latitude", plot_width=800, plot_height=600, background_fill_color=Set3_12[4], background_fill_alpha=0.2, tools='box_zoom,reset') fig.patches(xs="lons", ys="lats", line_color='grey', line_width=1.0, fill_color=Set3_12[10], source=source)
df.describe() # now it is the time to do the mapping state_df = df.groupby(df['state'])['deaths_and_injuries'].sum() print(state_df.head(n=5)) colors = bokeh.palettes.OrRd5[::-1] color_mapper = bokeh.models.mappers.LinearColorMapper(palette=colors) state_dict = state_df.to_dict() # state leven information new_state_xs = [] new_state_ys = [] state_name = [] state_count = [] for abbr, state in statesData.items(): new_state_xs.append(state['lons']) new_state_ys.append(state['lats']) state_name.append(state['name']) state_count.append(state_dict.get(state['name'], 0)) incident_data_source = bokeh.models.sources.ColumnDataSource(df) state_data_source = bokeh.models.sources.ColumnDataSource( data=dict(x=new_state_xs, y=new_state_ys, color=state_count)) plot = figure(title="School shooting", plot_width=800, plot_height=500) plot.patches('x', 'y', source=state_data_source, color={ 'field': 'color',
def make_dataset(self): per_capita = self.per_capita.active == 1 data_type = self.data_getter.labels[self.data_getter.active].lower() date = self.date.value data = np.empty(len(US_STATES)) if data_type in ("cases", "deaths"): if not per_capita: dt_label = data_type label = f"Total New {data_type.title()}" else: dt_label = f"{data_type}_pc" label = f"New {data_type.title()} per 100,000" subset = GH_STATES_DATA.loc[GH_STATES_DATA["date"] == date, :] for i, (abbrv, state) in enumerate(US_STATES.items()): state_name = state["name"] value = subset.loc[subset["state"] == state_name, f"avg_{dt_label}"] if not value.empty and not np.isnan(value.values[0]): data[i] = max(0, value.values[0]) else: data[i] = 0 maxval = GH_STATES_DATA.loc[:, f"avg_{dt_label}"].max() elif data_type == "positivity": label = "Positivity (%)" subset = TRACKING_DATA.loc[TRACKING_DATA["datetime"] == date, ("state", "positivity")] for i, (abbrv, state) in enumerate(US_STATES.items()): value = subset.loc[subset["state"] == abbrv.upper(), "positivity"] if not value.empty and not np.isnan(value.values[0]): data[i] = max(0, value.values[0]) else: data[i] = 0 maxval = TRACKING_DATA.loc[:, "positivity"].max() interp = ( compute_log_palette # if logarithmic else compute_linear_palette ) color_data = { "color": [interp(PALETTE, maxval / 256, maxval, val) for val in data], "value": data, "state": [state["name"] for state in US_STATES.values()], "lons": [], "lats": [], } for state in US_STATES.values(): color_data["lons"].append(state["lons"]) color_data["lats"].append(state["lats"]) return label, maxval, ColumnDataSource(color_data)
import collections from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar from bokeh.palettes import RdBu11, RdBu10, BrBG11, Blues9 from bokeh.plotting import figure, curdoc from bokeh.layouts import row, column, gridplot from bokeh.models.widgets import Select, Paragraph, Slider, Div from bokeh.sampledata.us_states import data as states from bokeh.themes import built_in_themes import numpy as np import pandas as pd ## Load and process data states = collections.OrderedDict(sorted(states.items())) states.pop('DC') states.pop('AK') states.pop('HI') dfs = dict() for year in range(2013, 2018): tmp_df = pd.read_csv(f'data/{year}_processed.csv', usecols=['State', 'Year', 'Median AQI', 'prcp', 'tmax']) tmp_df = tmp_df[ ~tmp_df['State'].isin(['AK', 'HI'])] tmp_df.index = list(range(48)) dfs[year] = tmp_df df = pd.concat(dfs, names=['year']) data_2018 = {}