def get_source_geo_and_count_us_continent(state_count_experience_selector_dict,
                                          state_count_careerarea_selector_dict,
                                          states_map):
    states = {code: state for code, state in states_map.items()}
    states_sort_by_name = states.values()
    states_sort_by_name = sorted(states_sort_by_name, key=lambda x: x['name'])
    # sort states' geo by its name, it helps decide the order of count data. cause .csv data is already sorted by state name

    state_xs = [state["lons"] for state in states_sort_by_name]
    state_ys = [state["lats"] for state in states_sort_by_name]
    state_names = [state["name"] for state in states_sort_by_name]

    source = ColumnDataSource(
        data=dict(x=state_xs,
                  y=state_ys,
                  name=[name + ", United States" for name in state_names]))

    state_count_total = np.zeros(
        len(list(state_count_experience_selector_dict.values())[0]))
    for name, state_count in state_count_experience_selector_dict.items():
        state_count_total = [
            sum(x) for x in zip(state_count_total, state_count)
        ]
        source.add(data=state_count, name='count' + name)
    source.add(data=state_count_total, name='count_all')
    source.add(data=state_count_total, name='count')

    for experience_selector_name, state_count_careerarea in state_count_careerarea_selector_dict.items(
    ):
        careerarea_selector_name_initial = 'count' + experience_selector_name
        for careerarea_name, state_count in state_count_careerarea.items():
            source.add(data=state_count,
                       name=careerarea_selector_name_initial + careerarea_name)

    return source
Пример #2
0
def create_us_state_map(scores):
    from bokeh.sampledata.us_states import data as states

    states = {
        code: states
        for code, states in states.items() if code not in ['AK', 'HI']
    }

    state_xs = [state["lons"] for state in states.values()]
    state_ys = [state["lats"] for state in states.values()]

    teal_palette = [
        '#ffffff', '#e0f2f1', '#b2dfdb', '#80cbc4', '#4db6ac', '#26a69a',
        '#009688', '#00897b', '#00796b', '#00695c'
    ]

    state_names = [state['name'] for state in states.values()]
    state_scores = [
        scores[code] if code in scores.keys() else 0 for code in states.keys()
    ]
    color_mapper = LogColorMapper(palette=teal_palette,
                                  low=0.01,
                                  high=max(scores.values()))

    data = dict(
        x=state_xs,
        y=state_ys,
        name=state_names,
        rate=state_scores,
    )

    TOOLS = "pan,wheel_zoom,reset,hover,save"

    p = figure(title="NLP Ranking Scores Across U.S. States",
               tools=TOOLS,
               x_axis_location=None,
               y_axis_location=None,
               sizing_mode="scale_width",
               plot_width=1100,
               plot_height=700,
               tooltips=[("State", "@name"), ("Score", "@rate{0,0.00}")])
    p.grid.grid_line_color = None
    p.hover.point_policy = "follow_mouse"

    p.patches('x',
              'y',
              source=data,
              fill_color={
                  'field': 'rate',
                  'transform': color_mapper
              },
              fill_alpha=0.7,
              line_color="black",
              line_width=0.5)

    return p
Пример #3
0
def get_States_Sal(states_dict):
    # call get_Data() function to get salary data
    salStateData = get_data()

    # extract states lat and lon information for generating map
    states = {code: state for code, state in states_dict.items()}

    # sort the data by state names
    states_Name = sorted(states.values(), key=lambda x: x['name'])

    state_xs = [state["lons"] for state in states_Name]
    state_ys = [state["lats"] for state in states_Name]
    state_names = [state["name"] for state in states_Name]

    # create column data source
    source = ColumnDataSource(data=dict(
        x=state_xs, y=state_ys, stateN=[name for name in state_names]))

    # get average of salary occupation group in each state
    salStateAgg = pd.DataFrame(
        salStateData.groupby(
            ['STATECODE', 'stateName'], axis=0,
            as_index=False)['SALARYAVERAGE',
                            'SALARYREALTIMEAVERAGE'].mean()).reset_index()

    # Create colorMap dictionary
    keys = tuple(pd.unique(salStateAgg["SALARYREALTIMEAVERAGE"]))
    values = tuple([
        "#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941",
        "#006FA6", "#A30059", "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC",
        "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693",
        "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
        "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9",
        "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299",
        "#300018", "#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500",
        "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
        "#00489C", "#6F0062", "#0CBD66", "#EEC3FF"
    ])

    colorMap = dict(itertools.izip(keys, values))

    # add values to the source
    source.add(data=[str(x) for x in salStateAgg["STATECODE"]],
               name='statecode')
    source.add(data=[str(x) for x in salStateAgg["SALARYAVERAGE"]],
               name='salAvg')
    source.add(data=[str(x) for x in salStateAgg["SALARYREALTIMEAVERAGE"]],
               name='salRealAvg')
    source.add(
        data=[colorMap[x] for x in salStateAgg["SALARYREALTIMEAVERAGE"]],
        name='type_color')

    return source
Пример #4
0
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.us_states import data as states
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.palettes import PRGn11 as palette
from bokeh.palettes import Category20, Spectral11, Category10, PRGn11

try:
    del states["HI"]
    del states["AK"]
except:
    pass

palette.reverse()

states = {code: state for code, state in states.items()}
#print(states)

state_xs = [states[code]["lons"] for code in states]
state_ys = [states[code]["lats"] for code in states]
state_names = [state['name'] for state in states.values()]

state_rates = []
for name in state_names:
    abbr = state_pop.loc[name]["State Abb"]
    state_rates.append(
        float(df_state_slope[df_state_slope["state"] == abbr]["slope"]))
#print(state_rates)

lat_inkm = 111.132  ## at around lat = 45degrees from the wiki latitude page
lon_inkm = 78.847  ## at around lat = 45degrees from the wiki latitude page
Пример #5
0
# In order to visualize the data on a map easily, we'll drop the non-continental states and territories. First we'll drop them from the map coordinates from bokeh, and then from the dataframe df2.


#First delete/exclude from the bokeh coordinate dictionary and sort them alphabetically
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.us_states import data as states

del states["HI"]
del states["AK"]

EXCLUDED = ("ak", "hi", "pr", "gu", "vi", "mp", "as")#Exclude territories

import collections#This will allow us to order our states to match coordinates of coord library with data

ordStates= collections.OrderedDict(sorted(states.items()))

#Now exclude Hawaii, Alaska, and territories from our dataframe
df3= df2.drop(['HI','AK','PR','GU','VI','MP','AS','NA'], axis= 1)#For some reason we also have a 'NA' column, drop that too


# Now we'll count the number of tweets in in each state in df3 and calculate the mean tweet score for all the columns/states/series (will ignore NaN's, but output is NaN if the list for a state was empty). 

dfCount= df3.count()
dfMean= df3.mean()

# So now we have three dataframes: df3 = filtered data, dfCount = tweet count for each state in df3, dfMean= mean tweet score for each state in df3.  We'll use these to build our map and <strong>scale and normalize our tweet sentiment score data</strong>.
# Since some of our values are negative, we'll account for that as well.


#First I'll convert this to a dictionary to play with just the values and order the dictionary to match ordStates
Пример #6
0
from bokeh.models.widgets import Tabs

#Import scripts for tabs
from scripts.line import line_tab
from scripts.map_bar import map_bar_tab

#Import data
from bokeh.sampledata.us_states import data as States
measles = pd.read_csv("measles.csv")
#Create columns for Year and Week_Number
measles["year"] = measles["week"].apply(lambda x: int(str(x)[0:4]))
measles["week_num"] = measles["week"].apply(lambda x: int(str(x)[4:7]))
measles.drop("week", axis = 1, inplace = True)
#Create states lookup dict
states = {
        state["name"].upper(): state for code, state in States.items() if state["name"] not in ["Hawaii", "Alaska"]
    }

#Define function for summarising data
def summarise(df, group_by):
    #Group data
    grouped = df.groupby(by = group_by)
    #Summarise data as Series then convert back to Dataframe
    cases_sum = pd.DataFrame(grouped["cases"].sum()).reset_index()
    cases_avg = pd.DataFrame(grouped["cases"].mean()).reset_index()
    avg_incidence_year = pd.DataFrame(grouped["incidence_per_capita"].mean()).reset_index()
    #Give columns sensible names
    avg_incidence_year = avg_incidence_year.rename(columns = {"incidence_per_capita": "avg_incidence_per_week"})
    cases_sum = cases_sum.rename(columns = {"cases": "total_cases_per_year"})
    cases_avg = cases_avg.rename(columns = {"cases": "avg_cases_per_week"})
    #Merge dataframes
Пример #7
0
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.airport_routes import airports, routes
from bokeh.sampledata.us_states import data as us_states

output_file("graphs.html")

airports.set_index("AirportID", inplace=True)
airports.index.rename("index", inplace=True)
routes.rename(columns={
    "SourceID": "start",
    "DestinationID": "end"
},
              inplace=True)

lats, lons = [], []
for k, v in us_states.items():
    lats.append(np.array(v['lats']))
    lons.append(np.array(v['lons']))

source = ColumnDataSource(data=dict(lats=lats, lons=lons))

graph_layout = dict(
    zip(airports.index.astype(str), zip(airports.Longitude,
                                        airports.Latitude)))
layout_provider = StaticLayoutProvider(graph_layout=graph_layout)

fig = figure(x_range=(-180, -60),
             y_range=(15, 75),
             x_axis_label="Longitude",
             y_axis_label="Latitude",
             plot_width=800,
Пример #8
0
import time

import numpy as np

from bokeh.io import curdoc
from bokeh.models import HoverTool, HBox, VBox, Slider, Toggle
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.sampledata.us_states import data as states
from bokeh.palettes import Purples9

states = {
    code: state
    for code, state in states.items() if code not in ['HI', 'AK']
}


def gen_initial_rate(y):
    return min(np.random.choice([15, 40]) + np.random.uniform(-10, 10), 100)


state_xs = [state['lons'] for state in states.values()]
state_ys = [state['lats'] for state in states.values()]
colors = Purples9[::-1]

names = [state['name'] for state in states.values()]
initial_rates = [gen_initial_rate(1) for _ in states.values()]
state_colors = [colors[int(rate / 20)] for rate in initial_rates]

source = ColumnDataSource(data=dict(
    x=state_xs, y=state_ys, color=state_colors, name=names,
    rate=initial_rates))
Пример #9
0
import time

import numpy as np

from bokeh.io import curdoc
from bokeh.models import HoverTool, HBox, VBox, Slider, Toggle
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.sampledata.us_states import data as states
from bokeh.palettes import Purples9

states = {
    code: state for code, state in states.items() if
    code not in ['HI', 'AK']
}

def gen_initial_rate(y):
    return min(
        np.random.choice([15, 40]) + np.random.uniform(-10, 10),
        100
    )

state_xs = [state['lons'] for state in states.values()]
state_ys = [state['lats'] for state in states.values()]
colors = Purples9[::-1]

names = [state['name'] for state in states.values()]
initial_rates = [gen_initial_rate(1) for _ in states.values()]
state_colors = [colors[int(rate / 20)] for rate in initial_rates]

source = ColumnDataSource(data=dict(
    x=state_xs,
Пример #10
0
from bokeh.models.graphs import NodesAndLinkedEdges
from bokeh.palettes import Set3_12
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.us_states import data as us_states
from bokeh.sampledata.airport_routes import airports, routes

import numpy as np

output_file("graphs.html")

airports.set_index("AirportID", inplace=True)
airports.index.rename("index", inplace=True)
routes.rename(columns={"SourceID": "start", "DestinationID": "end"}, inplace=True)

lats, lons = [], []
for k, v in us_states.items():
    lats.append(np.array(v['lats']))
    lons.append(np.array(v['lons']))

source = ColumnDataSource(data=dict(lats=lats, lons=lons))

graph_layout = dict(zip(airports.index.astype(str), zip(airports.Longitude, airports.Latitude)))
layout_provider = StaticLayoutProvider(graph_layout=graph_layout)

fig = figure(x_range=(-180, -60), y_range=(15,75),
              x_axis_label="Longitude", y_axis_label="Latitude",
              plot_width=800, plot_height=600, background_fill_color=Set3_12[4],
              background_fill_alpha=0.2, tools='box_zoom,reset')

fig.patches(xs="lons", ys="lats", line_color='grey', line_width=1.0,
             fill_color=Set3_12[10], source=source)
df.describe()

# now it is the time to do the mapping

state_df = df.groupby(df['state'])['deaths_and_injuries'].sum()
print(state_df.head(n=5))
colors = bokeh.palettes.OrRd5[::-1]
color_mapper = bokeh.models.mappers.LinearColorMapper(palette=colors)
state_dict = state_df.to_dict()

# state leven information
new_state_xs = []
new_state_ys = []
state_name = []
state_count = []
for abbr, state in statesData.items():
    new_state_xs.append(state['lons'])
    new_state_ys.append(state['lats'])
    state_name.append(state['name'])
    state_count.append(state_dict.get(state['name'], 0))

incident_data_source = bokeh.models.sources.ColumnDataSource(df)
state_data_source = bokeh.models.sources.ColumnDataSource(
    data=dict(x=new_state_xs, y=new_state_ys, color=state_count))

plot = figure(title="School shooting", plot_width=800, plot_height=500)
plot.patches('x',
             'y',
             source=state_data_source,
             color={
                 'field': 'color',
Пример #12
0
    def make_dataset(self):

        per_capita = self.per_capita.active == 1
        data_type = self.data_getter.labels[self.data_getter.active].lower()
        date = self.date.value

        data = np.empty(len(US_STATES))

        if data_type in ("cases", "deaths"):

            if not per_capita:
                dt_label = data_type
                label = f"Total New {data_type.title()}"
            else:
                dt_label = f"{data_type}_pc"
                label = f"New {data_type.title()} per 100,000"

            subset = GH_STATES_DATA.loc[GH_STATES_DATA["date"] == date, :]
            for i, (abbrv, state) in enumerate(US_STATES.items()):
                state_name = state["name"]
                value = subset.loc[subset["state"] == state_name,
                                   f"avg_{dt_label}"]
                if not value.empty and not np.isnan(value.values[0]):
                    data[i] = max(0, value.values[0])
                else:
                    data[i] = 0

            maxval = GH_STATES_DATA.loc[:, f"avg_{dt_label}"].max()

        elif data_type == "positivity":

            label = "Positivity (%)"

            subset = TRACKING_DATA.loc[TRACKING_DATA["datetime"] == date,
                                       ("state", "positivity")]
            for i, (abbrv, state) in enumerate(US_STATES.items()):
                value = subset.loc[subset["state"] == abbrv.upper(),
                                   "positivity"]
                if not value.empty and not np.isnan(value.values[0]):
                    data[i] = max(0, value.values[0])
                else:
                    data[i] = 0

            maxval = TRACKING_DATA.loc[:, "positivity"].max()

        interp = (
            compute_log_palette  # if logarithmic else compute_linear_palette
        )

        color_data = {
            "color":
            [interp(PALETTE, maxval / 256, maxval, val) for val in data],
            "value": data,
            "state": [state["name"] for state in US_STATES.values()],
            "lons": [],
            "lats": [],
        }

        for state in US_STATES.values():
            color_data["lons"].append(state["lons"])
            color_data["lats"].append(state["lats"])

        return label, maxval, ColumnDataSource(color_data)
Пример #13
0

import collections

from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.palettes import RdBu11, RdBu10, BrBG11, Blues9
from bokeh.plotting import figure, curdoc
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Select, Paragraph, Slider, Div
from bokeh.sampledata.us_states import data as states
from bokeh.themes import built_in_themes
import numpy as np
import pandas as pd

## Load and process data
states = collections.OrderedDict(sorted(states.items()))
states.pop('DC')
states.pop('AK')
states.pop('HI')

dfs = dict()
for year in range(2013, 2018):
    tmp_df = pd.read_csv(f'data/{year}_processed.csv',
                         usecols=['State', 'Year', 'Median AQI', 'prcp', 'tmax'])
    tmp_df = tmp_df[ ~tmp_df['State'].isin(['AK', 'HI'])]
    tmp_df.index = list(range(48))
    dfs[year] = tmp_df

df = pd.concat(dfs, names=['year'])

data_2018 = {}