def multiline(): county_data = read_county_from_db(session.get('current_state'), session.get('current_county')) source = helper_functions.process_data(county_data) # Create a column for the label source['value_label'] = source['value'].apply( lambda x: helper_functions.to_percentage(x)) # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['year'], empty='none') demographics = [ 'Total white population (15-64)', 'Total black population (15-64)', 'White jail population', 'Black jail population', 'White prison population', 'Black prison population' ] # Define color pairs matched to above demographics hex_colors = [ '#cccec1', '#272727', '#cccec1', '#272727', '#cccec1', '#272727' ] # Combine demographic and colors into a dictionary demographic_labels = dict(zip(demographics, hex_colors)) # Create pairs of variables to be used in the stacked charts wb_general = ['perc_white_total_pop', 'perc_black_total_pop'] wb_jail = ['perc_white_jail_pop', 'perc_black_jail_pop'] wb_prison = ['perc_white_prison_pop', 'perc_black_prison_pop'] # General population chart total_wb_population = alt.Chart( source[source['variable'].isin(wb_general)], height=150, width=500 ).mark_bar().encode( x=alt.X("year:O", axis=Axis(title='Year')), y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')), color=alt.Color('demographic:N', legend=None, scale=alt.Scale(domain=list(demographic_labels.keys()), range=list( demographic_labels.values()))) ).properties( title= 'Ratio of white/black residents in total county population (15-64)') if session.get('jail_data_exists'): # White/black jail population chart total_wb_jail = alt.Chart( source[source['variable'].isin(wb_jail)], height=150, width=500).mark_bar().encode( x=alt.X("year:O", axis=Axis(title='Year')), y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')), color=alt.Color('demographic:N', legend=None, scale=alt.Scale( domain=list(demographic_labels.keys()), range=list(demographic_labels.values()))) ).properties( title='Ratio of white/black inmates in jail population') if session.get('prison_data_exists'): total_wb_prison = alt.Chart( source[source['variable'].isin(wb_prison)], height=150, width=500).mark_bar().encode( x=alt.X("year:O", axis=Axis(title='Year')), y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')), color=alt.Color('demographic:N', legend=None, scale=alt.Scale( domain=list(demographic_labels.keys()), range=list(demographic_labels.values()))) ).properties( title='Ratio of white/black inmates in prison population') # Concatenate charts depending on what data is available if session.get('prison_data_exists') and session.get('jail_data_exists'): chart = alt.vconcat(total_wb_population, total_wb_jail, total_wb_prison) elif session.get( 'prison_data_exists') and not session.get('jail_data_exists'): chart = alt.vconcat(total_wb_population, total_wb_prison) elif not session.get('prison_data_exists') and session.get( 'jail_data_exists'): chart = alt.vconcat(total_wb_population, total_wb_jail) else: chart = total_wb_population return chart.to_json()
""" World Projections ----------------- This example shows a map of the countries of the world using four available geographic projections. For more details on the projections available in Altair, see https://vega.github.io/vega-lite/docs/projection.html """ # category: maps import altair as alt from vega_datasets import data source = alt.topo_feature(data.world_110m.url, 'countries') base = alt.Chart(source).mark_geoshape( fill='#666666', stroke='white' ).properties( width=300, height=180 ) projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic'] charts = [base.project(proj).properties(title=proj) for proj in projections] alt.vconcat( alt.hconcat(*charts[:2]), alt.hconcat(*charts[2:]) )
def make_plots(tidy_ballots_df, benchmarks_df, colors): '''create two linked output plots Parameters ---------- tidy_ballots_df : pandas DataFrame DataFrame of the form produced by tidy_ballots() benchmarks_df : pandas DataFrame DataFrame of the form produced by `calculate_benchmarks()` colors : dict Dictionary of the form `'player name': 'hex_color'` Returns ------- altair Chart object An altair chart consisting of a horizontal bar plot and a linked line plot ''' color_scale = alt.Color('player:N', legend=None, scale=colors) click = alt.selection_multi(fields=['player']) # brush = alt.selection_interval(encodings=['x']) top = alt.Chart().mark_bar().encode( x = alt.X('sum(votes):Q', scale=alt.Scale(domain=(0, 412)), axis=alt.Axis(title='total votes')), y = alt.Y('player:N', sort=alt.EncodingSortField(field='votes', op='sum', order='descending'), axis=alt.Axis(title=None)), tooltip = alt.Tooltip('sum(votes):Q', title='votes'), color=alt.condition(click, color_scale, alt.value('lightgray')) ).properties( width=600, height=350 ).add_selection( click ) bottom = alt.Chart().mark_line(point=True).encode( x = alt.X('yearmonthdate(date):T', axis=alt.Axis(title='date')), y = alt.Y('cumulative_votes:Q', axis = alt.Axis(title='cumulative votes')), color = alt.Color('player:N', legend=None, scale=colors), tooltip = alt.Tooltip('player:N', title='null') ).properties( width=600, height=300 ).transform_filter( click ).interactive() line75_df = get_cum_ballots_by_date(tidy_ballots_df) line75 = alt.Chart(line75_df).mark_line(point=True, strokeDash=[4,4]).encode( x = alt.X('yearmonthdate(date):T'), y = alt.Y('line75:Q'), color = alt.value('gray'), tooltip = alt.Tooltip("line75:Q") ) current_pace_bars = alt.Chart(benchmarks_df).mark_rule(color='orangered').encode( x='induction_pace:Q', tooltip=alt.Tooltip('induction_pace:Q') ) current_pace_lines = alt.Chart(benchmarks_df).mark_rule(color='orangered').encode( y='induction_pace:Q', tooltip=alt.Tooltip('induction_pace:Q') ) return alt.vconcat( (top + current_pace_bars), (bottom + current_pace_lines + line75), data=tidy_ballots_df)
stds = np.logspace(0, -4, num=16) prod = product(trainer, stds, [True, False]) # type:ignore prod = prod.fit(data, epoch_data=data[:]) prod # - it = iter(prod) print(next(it)) print(next(it)) print(next(it)) # 訓練を実行し、結果をデータフレームで返します。 df = prod.to_frame(columns=["std", "bn"]) df.tail() # 可視化します。 import altair as alt # isort:skip def plot(std, df): y = alt.Y("accuracy", scale=alt.Scale(domain=[0, 1])) return (alt.Chart(df).mark_line().encode( x="epoch", y=y, color="bn").properties(width=80, height=80, title=f"std={std:.05f}")) charts = [plot(*x) for x in df.groupby("std")][::-1] alt.vconcat(*(alt.hconcat(*charts[k:k + 4]) for k in range(0, 16, 4)))
import altair as alt from vega_datasets import data source = data.movies.url pts = alt.selection(type="single", encodings=['x']) rect = alt.Chart(data.movies.url).mark_rect().encode( alt.X('IMDB_Rating:Q', bin=True), alt.Y('Rotten_Tomatoes_Rating:Q', bin=True), alt.Color('count()', scale=alt.Scale(scheme='greenblue'), legend=alt.Legend(title='Total Records'))) circ = rect.mark_point().encode( alt.ColorValue('grey'), alt.Size( 'count()', legend=alt.Legend(title='Records in Selection'))).transform_filter(pts) bar = alt.Chart(source).mark_bar().encode( x='Major_Genre:N', y='count()', color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey"))).properties(selection=pts, width=550, height=200) alt.vconcat(rect + circ, bar).resolve_legend(color="independent", size="independent")
def facet_wrap(subplts, plots_per_row): rows = [subplts[i * plots_per_row:i * plots_per_row + plots_per_row] for i in range(len(subplts) // plots_per_row)] rows = [alt.hconcat(*charts) for charts in rows] return alt.vconcat(*rows).configure_axis(grid=False).configure_view(strokeOpacity=0).configure_axisBottom( labelAngle=0 )
# brush # ).properties(width=500,height=500,title="Number of People Going to Bars and Restaurants") # # st.write(behavioral) # vCombine = alt.vconcat(emotionalBase,behavioralBase) #vertical concat two combined charts # st.write(vCombine) #---version 2 combine #st.write(commWorryChart|selfWorryChart|barDataChart|restaurantDataChart) #horizontal concat #---version 3 combine #st.write(commWorryChart+selfWorryChart) #layer #---version 4 combine #vertical concat all 4 vconcatChart=alt.vconcat(commWorryChart,selfWorryChart,barDataChart,restaurantDataChart).add_selection( selectedCounty, brush ) st.write(vconcatChart) #---Show Raw Data data table #BarData if st.checkbox("Show me the raw data for bar visits"): st.write(barDatadf) #RestaurantData if st.checkbox("Show me the raw data for restaurant visits"): st.write(restaurantDatadf) #CommunityWorry Data if st.checkbox("Show me the raw for those data worry about illness in community"): st.write(commWorrydf)
ratings = alt.Chart(sh_movies_df).mark_circle().encode( alt.X('Rotten_Tomatoes_Rating:Q', title='Rotten Tomatoes Rating (Normalized)', scale=alt.Scale(domain=[0, 1])), alt.Y('IMDB_Rating:Q', title='IMDB Rating (Normalized)', scale=alt.Scale(domain=[0, 1])), alt.Tooltip( ['Title:N', 'Rotten_Tomatoes_Rating:Q', 'IMDB_Rating:Q', 'Year:O']), opacity=alt.condition(brush, alt.value(0.75), alt.value(0.05))).properties( width=650, height=400, title='Movie Ratings (Normalized) in IMDB and Rotten Tomatoes') gross = alt.Chart(sh_movies_df).mark_circle().encode( alt.X('Production_Budget:Q', scale=alt.Scale(type='log', base=10, zero=False), axis=alt.Axis(format="~s"), title='Production Budget ($) (log_10 scale)'), alt.Y('Worldwide_Gross:Q', scale=alt.Scale(type='log', base=10, zero=False), axis=alt.Axis(format="~s"), title='Worldwide Gross ($) (log_10 scale)'), alt.Tooltip( ['Title:N', 'Production_Budget:Q', 'Worldwide_Gross:Q', 'Year:O']), opacity=alt.condition(brush, alt.value(0.75), alt.value(0.05))).properties( width=650, height=400, title='World Gross and Production Budget') alt.vconcat(years, ratings, gross).properties(spacing=10)
width=250, height=250, ).add_selection(brush).add_selection(hover) text = alt.Chart(df).mark_text(dy=-5, align='right').encode( alt.Text('name', type='nominal'), x=alt.X("Principal component 1", axis=alt.Axis(ticks=False, labels=False, grid=False)), y=alt.Y("Principal component 2", axis=alt.Axis(ticks=False, labels=False, grid=False)), tooltip=[ 'accession', 'name', 'country', 'admixed group', 'continent', 'country code', 'CS number', 'latitude', 'longitude', 'collector', 'site', 'seq by' ], opacity=alt.condition( ~hover, alt.value(0), alt.value(1))).transform_filter(selection).transform_filter(hover) chart_PCA += text legend = alt.Chart().mark_rect().encode( y=alt.Y('country:N', axis=alt.Axis(orient='left', title="Countries")), color=color).add_selection(selection).transform_filter(brush) hcharts = alt.hconcat(chart_GTM, chart_tSNE, chart_PCA, data=df) chart = alt.hconcat(legend, chart, data=df) vcharts = alt.vconcat(hcharts, chart, data=df).properties() vcharts.save(argv[2] + '.html')
""" Histogram with Responsive Bins ------------------------------ This shows an example of a histogram with bins that are responsive to a selection domain. Click and drag on the bottom panel to see the bins change on the top panel. """ # category: histograms import altair as alt from vega_datasets import data source = data.flights_5k() brush = alt.selection_interval(encodings=['x']) base = alt.Chart(source).transform_calculate( time="hours(datum.date) + minutes(datum.date) / 60").mark_bar().encode( y='count():Q').properties(width=600, height=100) alt.vconcat( base.encode( alt.X('time:Q', bin=alt.Bin(maxbins=30, extent=brush), scale=alt.Scale(domain=brush))), base.encode(alt.X('time:Q', bin=alt.Bin(maxbins=30)), ).add_selection(brush))
# category: interactive import altair as alt from vega_datasets import data scale = alt.Scale( domain=['sun', 'fog', 'drizzle', 'rain', 'snow'], range=['#e7ba52', '#c7c7c7', '#aec7e8', '#1f77b4', '#9467bd']) brush = alt.selection_interval(encodings=['x']) points = alt.Chart().mark_point().encode( alt.X('date:T', timeUnit='monthdate', axis=alt.Axis(title='Date')), alt.Y('temp_max:Q', axis=alt.Axis(title='Maximum Daily Temperature (C)')), color=alt.condition(brush, 'weather:N', alt.value('lightgray'), scale=scale), size=alt.Size('precipitation:Q', scale=alt.Scale(range=[5, 200]))).properties(width=600, height=400, selection=brush) bars = alt.Chart().mark_bar().encode( x='count(*):Q', y='weather:N', color=alt.Color('weather:N', scale=scale), ).transform_filter(brush.ref()).properties(width=600) chart = alt.vconcat(points, bars, data=data.seattle_weather.url)
def one_variable(self, df, variable, axis_title, color_scheme): base = alt.Chart(df).transform_filter( alt.datum.variable == variable).transform_filter( alt.datum.value > 0).encode(color=alt.Color( 'sum(value):Q', title=None, scale=alt.Scale(type='log', base=2, scheme=color_scheme))) heatmap = base.mark_rect().encode(x=alt.X('day(datum_date):O', title=axis_title), y=alt.Y('day(bulletin_date):O', title='Día boletín'), tooltip=[ 'variable', 'day(bulletin_date):O', 'day(datum_date):O', alt.Tooltip(field='value', type='quantitative', aggregate='sum') ]) right = base.mark_bar().encode(x=alt.X('sum(value):Q', title=None, axis=None), y=alt.Y('day(bulletin_date):O', title=None, axis=None), tooltip=[ 'variable', 'day(bulletin_date):O', alt.Tooltip(field='value', type='quantitative', aggregate='sum') ]) top = base.mark_bar().encode(x=alt.X('day(datum_date):O', title=None, axis=None), y=alt.Y('sum(value):Q', title=None, axis=None), tooltip=[ 'variable', 'day(datum_date):O', alt.Tooltip(field='value', type='quantitative', aggregate='sum') ]) heatmap_size = 160 histogram_size = 40 return alt.vconcat( top.properties( width=heatmap_size, height=histogram_size, # This title should logically belong to the whole chart, # but assigning it to the concat chart anchors it wrong. # See: https://altair-viz.github.io/user_guide/generated/core/altair.TitleParams.html title=alt.TitleParams(text=variable, anchor='middle', align='center', fontSize=14, fontWeight='normal')), alt.hconcat(heatmap.properties(width=heatmap_size, height=heatmap_size), right.properties(width=histogram_size, height=heatmap_size), spacing=3), spacing=3)
def int_vega(): rl_vio = doc(0) rl_vio["YEAR"] = rl_vio["YEAR"].astype("int") source = rl_vio[rl_vio["YEAR"] > 2015] source2 = source scale = alt.Scale(domain=[2016, 2017, 2018, 2019, 2020, 2021], range=["#e7ba52", "#c7c7c7", "#aec7e8", "#659CCA", "#1f77b4", "#9467bd"]) color = alt.Color('YEAR:O', scale=scale) click = alt.selection_multi(encodings=['color']) brush = alt.selection_interval() st.text("How about damage level? Click 'Show Damage Level'!") if st.button("Show Damage Level"): points = alt.Chart(source).mark_point().encode( alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]), alt.X('MONTH:O', title='Month', axis=alt.Axis( offset=10, labelAngle=0, ticks=True, minExtent=30, grid=False ) ), alt.Y('sum(RECORDS):Q', scale=alt.Scale(domain=[0, 6000]), axis=alt.Axis( offset=10, ticks=True, minExtent=30, grid=False, ), title="Records"), color=alt.condition(brush, color, alt.value('darkgray')), size="DAMAGE:N" ).properties( width=650, height=400, ).add_selection( brush ).transform_filter( click ) lines = alt.Chart(source2).mark_circle().encode( alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]), alt.X('MONTH:O',title="Month", axis=alt.Axis( offset=10, ticks=True, labelAngle=0, minExtent=30, grid=False )), alt.Y("sum(INJURIES):Q", title="Injuries", axis=alt.Axis( offset=10, ticks=True, minExtent=30, grid=False, )), color=alt.condition(brush, color, alt.value('red')), size="DAMAGE:N" ).transform_filter( brush ).properties( width=650, ).add_selection( click ) vega = alt.vconcat( points, lines, title="Cases VS Injured" ) else: points = alt.Chart(source).mark_point().encode( alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]), alt.X('MONTH:O', title='Month', axis=alt.Axis( offset=10, labelAngle=0, ticks=True, minExtent=30, grid=False ) ), alt.Y('sum(RECORDS):Q', scale=alt.Scale(domain=[0, 12000]), axis=alt.Axis( offset=10, ticks=True, minExtent=30, grid=False, ), title="Records"), color=alt.condition(brush, color, alt.value('darkgray')), size=alt.value(80) ).properties( width=750, height=400, ).add_selection( brush ).transform_filter( click ) lines = alt.Chart(source2).mark_circle().encode( alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]), alt.X('MONTH:O',title="Month", axis=alt.Axis( offset=10, ticks=True, labelAngle=0, minExtent=30, grid=False )), alt.Y("sum(INJURIES):Q", title="Injuries", axis=alt.Axis( offset=10, ticks=True, minExtent=30, grid=False, )), color=alt.condition(brush, color, alt.value('red')), size= alt.value(80) ).transform_filter( brush ).properties( width=750, ).add_selection( click ) vega = alt.vconcat( points, lines, title="Cases VS Injuries" ) return vega
""" Interval Selection Example ========================== This is an example of creating a stacked chart for which the domain of the top chart can be selected by interacting with the bottom chart. """ # category: area charts import altair as alt from vega_datasets import data source = data.sp500.url brush = alt.selection(type='interval', encodings=['x']) upper = alt.Chart().mark_area().encode( alt.X('date:T', scale={'domain': brush.ref()}), y='price:Q' ).properties( width=600, height=200 ) lower = upper.properties( height=60 ).add_selection(brush) alt.vconcat(upper, lower, data=source)
def bv_areaPlot(data, engine, xlabel, ylabel1, ylabel2): data = data.copy() data.rename(columns={'plotY':ylabel1, 'plotX1':ylabel2}, inplace=True) if engine == 'Static': fig, axes = plt.subplots(figsize=(9,6)) _index = data.index.tolist() axes.fill_between(_index, data[ylabel1].values) axes.legend([ylabel1], loc=0) axes_r = axes.twinx() axes_r.fill_between(_index, data[ylabel2].values, color='orange') axes_r.legend([ylabel2], loc=0) axes.set_xlabel(xlabel, fontsize = 15) axes.set_ylabel(ylabel1, fontsize = 15) axes_r.set_ylabel(ylabel2, fontsize = 15) axes.grid(b=True, which='major', color='k', linewidth=0.25) plt.close() return pn.pane.Matplotlib(fig, tight=True) elif engine == 'Interactive': data=data.dropna() # Selection Brush brush = alt.selection(type='interval', encodings=['x'], name='isel') # Base Plot base = alt.Chart(data.reset_index()) base = base.encode(x = alt.X('{0}:T'.format(data.index.name), title=''), tooltip = ylabel1) base = base.properties(width = 580, height = 275) # Upper Plot upper1 = base.mark_area(line={'color':'#3d84ba'}, color=alt.Gradient( gradient='linear', stops=[alt.GradientStop(color='white', offset=0), alt.GradientStop(color='#3d84ba', offset=1)], x1=1, x2=1, y1=1, y2=0 )) upper1 = upper1.encode(x = alt.X('{0}:T'.format(data.index.name), scale=alt.Scale(domain=brush), title=''), y = alt.Y('{0}:Q'.format(ylabel1), scale=alt.Scale(zero=False), axis=alt.Axis(format='~s'))) upper2 = base.mark_area(line={'color':'#f57542'}, color=alt.Gradient( gradient='linear', stops=[alt.GradientStop(color='white', offset=0), alt.GradientStop(color='#f57542', offset=1)], x1=1, x2=1, y1=1, y2=0 )) upper2 = upper2.encode(x = alt.X('{0}:T'.format(data.index.name), scale=alt.Scale(domain=brush), title=''), y = alt.Y('{0}:Q'.format(ylabel2), scale=alt.Scale(zero=False), axis=alt.Axis(format='~s'))) # Lower Plot lower = base.mark_area(line={'color':'darkgray'}, color=alt.Gradient( gradient='linear', stops=[alt.GradientStop(color='white', offset=0), alt.GradientStop(color='darkgray', offset=1)], x1=1, x2=1, y1=1, y2=0 )) lower = lower.encode(y=alt.Y('{0}:Q'.format(ylabel1), title='', axis=None)) lower = lower.properties(height=20) lower = lower.add_selection(brush) lower.encoding.x.title = 'Interval Selection' # Base Statistics1 base_stat1 = upper1.transform_filter(brush) base_stat1 = base_stat1.transform_aggregate(Mean1='mean({0})'.format(ylabel1), StdDev1='stdev({0})'.format(ylabel1), Var1='variance({0})'.format(ylabel1)) label_stat1 = base_stat1.transform_calculate(stat_label1="'Mean = ' + format(datum.Mean1, '~s') + \ '; Standard Deviation = ' + format(datum.StdDev1, '~s') +\ '; Variance = ' + format(datum.Var1, '~s')") label_stat1 = label_stat1.mark_text(align='left', baseline='bottom', color='#3d84ba') label_stat1 = label_stat1.encode(x=alt.value(0.0), y=alt.value(12.0), text=alt.Text('stat_label1:N')) # Base Statistics2 base_stat2 = upper2.transform_filter(brush) base_stat2 = base_stat2.transform_aggregate(Mean2='mean({0})'.format(ylabel2), StdDev2='stdev({0})'.format(ylabel2), Var2='variance({0})'.format(ylabel2)) label_stat2 = base_stat2.transform_calculate(stat_label1="'Mean = ' + format(datum.Mean2, '~s') + \ '; Standard Deviation = ' + format(datum.StdDev2, '~s') +\ '; Variance = ' + format(datum.Var2, '~s')") label_stat2 = label_stat2.mark_text(align='left', baseline='bottom', color='#f57542') label_stat2 = label_stat2.encode(x=alt.value(0.0), y=alt.value(25.0), text=alt.Text('stat_label1:N')) upper1 = upper1 + label_stat1 upper2 = upper2 + label_stat2 upper = (upper1+upper2).resolve_scale(y='independent') ## Y LABEL 1 # Values _ymean_uu1 = data[ylabel1].max() _ymean1 = data[ylabel1].mean() # Inspired from :- https://stats.stackexchange.com/a/350278 _maxvar_in_slice1 = ((data[ylabel1].max()-data[ylabel1].min())/2)**2 _ystd_uu1 = np.sqrt(_maxvar_in_slice1) _ystd1 = data[ylabel1].std() _yvar_uu1 = _maxvar_in_slice1 _yvar1 = data[ylabel1].var() # Stat Bar Base stats_barbase1 = base_stat1.mark_bar(color='#3d84ba') stats_barbase1 = stats_barbase1.properties(width = 188, height = 20) # Mean Bar mean_bar1 = stats_barbase1.encode(x=alt.X('Mean1:Q', title='', scale=alt.Scale(domain=[-_ymean_uu1,_ymean_uu1]), axis=alt.Axis(format='~s')), y=alt.value(10.5)) totmean_line1 = alt.Chart(pd.DataFrame({'x': [_ymean1]})) totmean_line1 = totmean_line1.mark_rule(color='red', size=5) totmean_line1 = totmean_line1.encode(x='x') mean_bar1 += totmean_line1 # Standard Deviation Bar std_bar1 = stats_barbase1.encode(x=alt.X('StdDev1:Q', title='', scale=alt.Scale(domain=[-_ystd_uu1,_ystd_uu1]), axis=alt.Axis(format='~s')), y=alt.value(10.5)) totstd_line1 = alt.Chart(pd.DataFrame({'x': [_ystd1]})) totstd_line1 = totstd_line1.mark_rule(color='red', size=5) totstd_line1 = totstd_line1.encode(x='x') std_bar1 += totstd_line1 # Variance Bar var_bar1 = stats_barbase1.encode(x=alt.X('Var1:Q', title='', scale=alt.Scale(domain=[-_yvar_uu1,_yvar_uu1]), axis=alt.Axis(format='~s')), y=alt.value(10.5)) totvar_line1 = alt.Chart(pd.DataFrame({'x': [_yvar1]})) totvar_line1 = totvar_line1.mark_rule(color='red', size=5) totvar_line1 = totvar_line1.encode(x='x') var_bar1 += totvar_line1 ## Y LABEL 2 # Values _ymean_uu2 = data[ylabel2].max() _ymean2 = data[ylabel2].mean() # Inspired from :- https://stats.stackexchange.com/a/350278 _maxvar_in_slice2 = ((data[ylabel2].max()-data[ylabel2].min())/2)**2 _ystd_uu2 = np.sqrt(_maxvar_in_slice2) _ystd2 = data[ylabel2].std() _yvar_uu2 = _maxvar_in_slice2 _yvar2 = data[ylabel2].var() # Stat Bar Base stats_barbase2 = base_stat2.mark_bar(color='#f57542') stats_barbase2 = stats_barbase2.properties(width = 188, height = 20) # Mean Bar mean_bar2 = stats_barbase2.encode(x=alt.X('Mean2:Q', title='Mean', scale=alt.Scale(domain=[-_ymean_uu2,_ymean_uu2]), axis=alt.Axis(format='~s')), y=alt.value(10.5)) totmean_line2 = alt.Chart(pd.DataFrame({'x': [_ymean2]})) totmean_line2 = totmean_line2.mark_rule(color='red', size=5) totmean_line2 = totmean_line2.encode(x='x') mean_bar2 += totmean_line2 # Standard Deviation Bar std_bar2 = stats_barbase2.encode(x=alt.X('StdDev2:Q', title='Std', scale=alt.Scale(domain=[-_ystd_uu2,_ystd_uu2]), axis=alt.Axis(format='~s')), y=alt.value(10.5)) totstd_line2 = alt.Chart(pd.DataFrame({'x': [_ystd2]})) totstd_line2 = totstd_line2.mark_rule(color='red', size=5) totstd_line2 = totstd_line2.encode(x='x') std_bar2 += totstd_line2 # Variance Bar var_bar2 = stats_barbase2.encode(x=alt.X('Var2:Q', title='Var', scale=alt.Scale(domain=[-_yvar_uu2,_yvar_uu2]), axis=alt.Axis(format='~s')), y=alt.value(10.5)) totvar_line2 = alt.Chart(pd.DataFrame({'x': [_yvar2]})) totvar_line2 = totvar_line2.mark_rule(color='red', size=5) totvar_line2 = totvar_line2.encode(x='x') var_bar2 += totvar_line2 # Concatenated # p = alt.vconcat(upper+label_stat, mean_bar|std_bar|var_bar, lower).configure_concat(spacing=2) p = alt.vconcat(upper, mean_bar1|std_bar1|var_bar1, mean_bar2|std_bar2|var_bar2, lower).configure_concat(spacing=2) p = p.configure_axisLeft(labelColor = '#3d84ba', titleColor = '#3d84ba') p = p.configure_axisRight(labelColor = '#f57542', titleColor = '#f57542') return p
def main(): # Extract data data = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv") data["date"] = pd.to_datetime(data.date) data["week"] = data["date"].apply(lambda x: x - pd.Timedelta(days=x.weekday())) data["week"] = data["week"].dt.date.apply(lambda x: str(x)) # Aggregate data # Weekly aggregation weekly_data = ( data.groupby(["location", "week"]) .agg( { "new_cases": "sum", "stringency_index": "max", "total_deaths": "max", "population": "max", "total_tests": "max", } ) .reset_index() .dropna() ) # Latest snapshot max_week = weekly_data.groupby("location").agg({"week": "max"}).reset_index() latest_data = weekly_data.merge(max_week) latest_data["death_per_population_pct"] = ( latest_data["total_deaths"] * 100 / latest_data["population"] ) latest_data["tests_per_population"] = ( latest_data["total_tests"] / latest_data["population"] ) list_of_countries = latest_data["location"] # Country dimension table country_dimension = ( data.groupby(["location"]) .agg( { "median_age": "max", "aged_65_older": "max", "aged_70_older": "max", "gdp_per_capita": "max", "cardiovasc_death_rate": "max", "diabetes_prevalence": "max", "female_smokers": "max", "male_smokers": "max", "handwashing_facilities": "max", "life_expectancy": "max", "human_development_index": "max", } ) .reset_index() ) country_dimension = country_dimension.merge( latest_data[["location", "death_per_population_pct", "tests_per_population"]], how="left", ) # Plot dashboards st.title("Covid visualizer") st.markdown( "THis is an example eda of covid data solely using python scripts." "There are several interesting attributes to consider." ) st.header("Stringency Index and Cases") st.subheader( "This is a composite measure based on nine response indicators including school closures, workplace closures, " "and travel bans, rescaled to a value from 0 to 100, 100 being strictest" ) country_filter = st.selectbox("Select country", list_of_countries, index=0) st.markdown("There is some more explanation which I like to give") weekly_new_cases = ( alt.Chart(weekly_data[weekly_data.location == country_filter]) .mark_line() .encode(x="week:T", y="sum(new_cases)") .properties(width=1000, height=500) ) weekly_stringency = ( alt.Chart(weekly_data[weekly_data.location == country_filter]) .mark_bar(color="#CFD8DC", opacity=0.5) .encode(x="week:T", y="mean(stringency_index)") .properties(width=1000, height=500) ) cases_stringency_corr = ( alt.Chart(weekly_data[weekly_data.location == country_filter]) .mark_point() .encode(y="new_cases", x="stringency_index") .properties(width=300, height=300) ) final = alt.layer(weekly_stringency, weekly_new_cases).resolve_scale( y="independent" ) col1, col2 = st.beta_columns((3, 1)) col1.write(final) col2.markdown( "THis is an example where I write additional data with many more analysis. Showing India as an example for visualizzation. There will be" "more and more text to explain how to inteact with this chart and pick a country of choice." "Add a lot more text to make the point clear and also how to take the data with a grain of salt." ) col2.write(cases_stringency_corr) # Plot Section 2 st.header("Death % against various socioeconomic variables") st.subheader("Scatterplot grid show several values") grid = alt.vconcat( alt.hconcat( plot_scatter_with_regression("median_age", source=country_dimension), plot_scatter_with_regression("gdp_per_capita", source=country_dimension), ), alt.hconcat( plot_scatter_with_regression( "cardiovasc_death_rate", source=country_dimension ), plot_scatter_with_regression( "diabetes_prevalence", source=country_dimension ), ), alt.hconcat( plot_scatter_with_regression( "handwashing_facilities", source=country_dimension ), plot_scatter_with_regression( "human_development_index", source=country_dimension ), ), ) col1, col2 = st.beta_columns((3, 1)) col1.write(grid) col2.markdown("Here is where the explanation goes by each chart") # Plot section 3 st.header("Testing and gdc per capita") st.subheader("Scatterplot grid show several values") test_by_gdp = country_dimension[ ["location", "gdp_per_capita", "tests_per_population"] ].dropna() test_by_gdp = test_by_gdp.sort_values('tests_per_population', ascending=False).iloc[:60, ] tests_chart = ( alt.Chart(test_by_gdp) .mark_bar() .encode( x=alt.X("location", sort="-y"), y="tests_per_population", color="gdp_per_capita", ) ).properties(height=500) st.write(tests_chart)
legend=alt.Legend(title='Total Records') ) ) circ = rect.mark_point().encode( alt.ColorValue('grey'), alt.Size('count()', legend=alt.Legend(title='Records in Selection') ) ).transform_filter( pts ) bar = alt.Chart(source).mark_bar().encode( x='Major_Genre:N', y='count()', color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey")) ).properties( selection=pts, width=550, height=200 ) alt.vconcat( rect + circ, bar ).resolve_legend( color="independent", size="independent" )
def get_group_chart(grid_df, min_value: float, max_value: float, title: str = ''): rcharts = list() for gv in group_values: bar_df = grid_df[grid_df[group_var] == gv] rcharts.append( get_bar_chart(bar_df, min_value, max_value)) return alt.hconcat(*rcharts, title=alt.TitleParams(title, anchor='middle', align='center', orient='top')) chart = alt.vconcat() min_unit = 0.05 for r, rv in enumerate(row_values): rcharts = list() row_df = to_inspect[to_inspect[row_var] == rv] min_value = math.floor(row_df['value'].min() / min_unit) * min_unit # type: ignore max_value = math.ceil(row_df['value'].max() / min_unit) * min_unit # type: ignore for cv in col_values: grid_df = to_inspect[(to_inspect[row_var] == rv) & (to_inspect[col_var] == cv)] # Only add title at the top row. title = f'{col_var} = {cv}' if r == 0 else '' rcharts.append(
def make_plot(df): """ Generates plots on tab 2 of the dashboard. Input - Data frame to be plotted Returns - Stock trend plot slider monthly change chart """ def mds_special(): """ Function for default MDS configuration for labels, titles etc. to be applied to altair plots """ font = "Arial" axisColor = "#000000" gridColor = "#DEDDDD" return { "config": { "title": { "fontSize": 24, "font": font, "anchor": "middle", # equivalent of left-aligned. "fontColor": "#000000" }, 'view': { "height": 300, "width": 400 }, "axisX": { "domain": True, #"domainColor": axisColor, "gridColor": gridColor, "domainWidth": 1, "grid": False, "labelFont": font, "labelFontSize": 12, "labelAngle": 0, "tickColor": axisColor, "tickSize": 5, # default, including it just to show you can change it "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "X Axis Title (units)", }, "axisY": { "domain": False, "grid": True, "gridColor": gridColor, "gridWidth": 1, "labelFont": font, "labelFontSize": 14, "labelAngle": 0, #"ticks": False, # even if you don't have a "domain" you need to turn these off. "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "Y Axis Title (units)", # titles are by default vertical left of axis so we need to hack this #"titleAngle": 0, # horizontal #"titleY": -10, # move it up #"titleX": 18, # move it to the right so it aligns with the labels }, } } # register the custom theme under a chosen name alt.themes.register('mds_special', mds_special) # enable the newly registered theme alt.themes.enable('mds_special') #alt.themes.enable('none') # to return to default # Create a plot from the cars dataset highlight = alt.selection(type='single', on='mouseover', nearest=True, fields=['company']) brush = alt.selection(type='interval', encodings=['x']) # stock history chart chart = alt.Chart(df).mark_line().encode( alt.X('date', title='Date', scale=alt.Scale(domain=brush)), alt.Y('price', title='Stock price (USD)'), color=alt.Color('company', title="Company"), size=alt.condition(~highlight, alt.value(3), alt.value(5))).add_selection(highlight).properties( title='Historical Stock Prices', width=900, height=350) bars = alt.Chart(df).mark_bar().encode( y=alt.Y('monthly_return', title='Monthly Change %', axis=alt.Axis(format='%')), x=alt.X('date', title='Month', scale=alt.Scale(domain=brush)), color=alt.condition( alt.datum.monthly_return > 0, alt.value("steelblue"), # The positive color alt.value("orange"))).properties( width=470, title='Monthly price change (%)').transform_filter( highlight).facet(facet='company', title="Monthly stock price % changes", columns=2) # monthly change chart lower = alt.Chart(df).mark_line().encode( alt.X('date', title=' ', scale=alt.Scale(domain=brush)), alt.Y('price', title=' ', axis=None), color=alt.Color('company', title="Company"), size=alt.condition(~highlight, alt.value(3), alt.value(5)) ).add_selection(highlight).properties( title= 'Feel free to drag across a time period below to zoom in the chart!', height=60, width=900).add_selection(brush) return alt.vconcat(chart, lower) & bars
def main(_): print("Loading data...") dfs = [] for filename in os.listdir(FLAGS.data): if filename.endswith(".csv"): dfs.append( pd.read_csv(os.path.join(FLAGS.data, filename), encoding="utf-8")) data = pd.concat(dfs) print("%d Examples" % (len(set(data["id"])))) print("%d Annotations" % len(data)) os.makedirs(FLAGS.plot_dir) with open(FLAGS.emotion_file, "r") as f: all_emotions = f.read().splitlines() all_emotions_neutral = all_emotions + ["neutral"] emotion2idx = {e: i for i, e in enumerate(all_emotions)} print("%d emotion Categories" % len(all_emotions)) print("Processing data...") # Remove neutral labels data = data[data["neutral"] == 0] # Remove examples with no ratings (difficult examples) data = data[data[all_emotions_neutral].sum(axis=1) != 0] # Convert into num_examples x num_raters x num_ratings format data = data.groupby("id").filter(lambda x: len(x) >= 3) id_groups = data.groupby("id") worker2examples = {} # dict mapping worker ids to (example, rater id) tuples max_num_raters = data.groupby("id").size().max() ratings = np.zeros( (len(id_groups), max_num_raters, len(all_emotions))) # ignore "neutral" rater_msk = np.zeros( (len(id_groups), max_num_raters)) # for masking out non-existent raters print("Ratings shape", ratings.shape) # Get ratings and rater mask texts = [] for ex_idx, (_, g) in enumerate(id_groups): texts.append(g.iloc[0]["text"]) rater_count = 0 # iterate through workers for _, row in g.iterrows(): for e in all_emotions: ratings[ex_idx, rater_count, emotion2idx[e]] = row[e] rater_msk[ex_idx, rater_count] = 1 worker_id = row["rater_id"] if worker_id in worker2examples: worker2examples[worker_id].append((ex_idx, rater_count)) else: worker2examples[worker_id] = [(ex_idx, rater_count)] rater_count += 1 print("Calculating leave-out (partial) correlations...") partial_corr_per_rater = [] corr_per_rater = [] for worker_id in worker2examples: partial_corrs, corrs = LeaveOut(ratings, rater_msk, worker2examples, worker_id) if len(partial_corrs) < len(all_emotions): continue partial_corr_per_rater.append(partial_corrs) corr_per_rater.append(corrs) corr_per_rater = np.array(corr_per_rater) partial_corr_per_rater = np.array(partial_corr_per_rater) # Verify that there are no NaN values assert np.isnan(corr_per_rater).sum() == 0 # Apply Wilcoxon signed rank test to test significance of each dimension p_vals = np.apply_along_axis(wilcoxon, 0, partial_corr_per_rater)[1] # Apply Bonferroni correction reject, corr_pvals, _, newalpha = multipletests( p_vals, alpha=0.05, method="bonferroni") print("Which dimensions to keep?") print(reject) print(corr_pvals) print(newalpha) print("Running PPCA on all the data...") # Take all raters and split them randomly x = [] y = [] rater_counts = rater_msk.sum(axis=1).astype(int) all_ratings_avg = [] for i, ex in enumerate(ratings): # Get actual raters based on mask keep = [] for worker_rating in ex[:rater_counts[i]]: keep.append(list(worker_rating)) all_ratings_avg.append(list(np.array(keep).mean(axis=0))) # Shuffle raters randomly random.shuffle(keep) num_raters = len(keep) x.append(list(np.array(keep[:int(num_raters / 2)]).mean(axis=0))) y.append(list(np.array(keep[int(num_raters / 2):]).mean(axis=0))) x = np.array(x) y = np.array(y) all_ratings_avg = np.array(all_ratings_avg) w, v = PPCA(x, y) # final components (p-values determine which ones to keep) print("Plotting percentage of covariance explained...") PlotCovar(v) # Apply varimax rotation w_vari = Varimax(w) # Get mapping between ppcs and emotions map_df = pd.DataFrame( w_vari, index=all_emotions, columns=np.arange(len(all_emotions))).round(4) # Sort to move values to diagonal map_df = map_df[list( np.argsort(map_df.apply(lambda x: pd.Series.nonzero(x)[0]).values)[0])] f = plt.figure(figsize=(10, 6), dpi=300) sns.heatmap( map_df, center=0, cmap=sns.diverging_palette(240, 10, n=50), yticklabels=all_emotions) plt.xlabel("Component") plt.savefig( FLAGS.plot_dir + "/component_loadings.pdf", dpi=600, format="pdf", bbox_inches="tight") ppc2emotion = map_df.abs().idxmax().to_dict() emotion2ppc = {e: i for i, e in ppc2emotion.items()} print(ppc2emotion) print("Plotting frequency and mean left-out rater correlations...") corr_mean = corr_per_rater.mean(axis=0) corr_mean_ordered = [corr_mean[emotion2ppc[e]] for e in all_emotions] df_plot = pd.DataFrame({ "emotion": all_emotions, "agreement": corr_mean_ordered }) df_plot["count"] = df_plot["emotion"].map( data[all_emotions].sum(axis=0).to_dict()) df_plot.sort_values("count", ascending=False, inplace=True) df_plot.to_csv(FLAGS.plot_dir + "/emotion_agreements.csv", index=False) # Get colors norm = plt.Normalize(df_plot["agreement"].min(), df_plot["agreement"].max()) sm = plt.cm.ScalarMappable(cmap="BuPu", norm=norm) sm.set_array([]) # Generate figure fig = plt.figure(dpi=600, figsize=(5, 6)) ax = sns.barplot( data=df_plot, y="emotion", x="count", orient="h", hue="agreement", palette="BuPu", dodge=False, edgecolor="black", linewidth=1) ax.get_legend().remove() ax.figure.colorbar(sm) plt.text(18000, 31, "Interrater\nCorrelation", ha="center") plt.xlabel("Number of Examples") plt.ylabel("") plt.draw() labels = [item.get_text() for item in ax.get_xticklabels()] ax.set_xticklabels(["%dk" % (int(int(label) / 1000)) for label in labels]) plt.tight_layout() fig.savefig( FLAGS.plot_dir + "/label_distr_agreement.pdf", dpi=600, format="pdf", bbox_inches="tight") print("Generating t-SNE plot...") # Get PPC scores for all examples all_ratings_avg = Demean(all_ratings_avg) # demean all ratings ppc_scores = all_ratings_avg.dot(w_vari) # project onto ppcs ppc_scores_abs = np.absolute(ppc_scores) # Load maximally distinct colors colors = pd.read_csv( FLAGS.rgb_colors, sep="\t", header=None, names=np.arange(3)) # Set colors (todo(ddemszky): add names to colors in file) palette_rgb = colors.values with open(FLAGS.emotion_color_order) as f: color_order = f.read().splitlines() ppc2color = {emotion2ppc[e]: i for i, e in enumerate(color_order)} # get rgb value for each example based on weighted average of top emotions rgb_vals = [] hex_vals = [] top_categories = [] threshold = 0.5 # exclude points not loading on any of the top 10 categories counter = 0 rgb_max = 255 other_color = palette_rgb[len(all_emotions), :] for i, scores in enumerate(ppc_scores_abs): top_ppcs = [ idx for idx in (-scores).argsort()[:2] if scores[idx] > threshold ] top_emotions = ",".join([ppc2emotion[idx] for idx in top_ppcs ]) if top_ppcs else "other" top_categories.append(top_emotions) if len(top_ppcs) < 1: # doesn't have top emotions from list color = other_color # use grey counter += 1 else: # Weighted average of top emotions (square->weighted average->square root) color_ids = [ppc2color[idx] for idx in top_ppcs] weights = [scores[idx] for idx in top_ppcs] # Need to round, otherwise floating point precision issues will result # in values slightly above 1 avg = np.round( np.sqrt( np.average( np.power(palette_rgb[color_ids] * rgb_max, 2), axis=0, weights=weights)) / rgb_max, 4) if (avg > 1).sum() > 0: print(avg) color = avg rgb_vals.append(list(color)) hex_vals.append("#%02x%02x%02x" % tuple(np.array(color * rgb_max, dtype=int))) rgb_vals = np.array(rgb_vals) # Create t-SNE model tsne_model = TSNE( perplexity=30, n_components=2, n_iter=1000, random_state=23, learning_rate=500, init="pca") new_values = tsne_model.fit_transform(ppc_scores) x = [] y = [] for value in new_values: x.append(value[0]) y.append(value[1]) # Put data in dataframe df = pd.DataFrame({ "x": x, "y": y, "color": hex_vals, "label(s)": top_categories, "text": texts }) df = df[df["label(s)"] != "other"] df["top_label"] = df["label(s)"].str.split(",").str[0] # Two selections: # - a brush that is active on the top panel # - a multi-click that is active on the bottom panel brush = alt.selection(type="interval") click = alt.selection_multi(encodings=["color"]) sample = df.sample(5000) # max 5000 examples can be plotted points = alt.Chart(sample).mark_point( filled=True, size=50).encode( x="x:Q", y="y:Q", color=alt.Color("color", scale=None), tooltip=["label(s)", "text"]).properties( width=700, height=600).add_selection(brush) # Bottom panel is a bar chart bars = alt.Chart(sample).mark_bar().encode( x="count()", y="top_label:N", color=alt.condition(click, alt.Color("color:N", scale=None), alt.value("lightgray")), ).transform_filter(brush.ref()).properties( width=700, selection=click) chart = alt.vconcat( points, bars, data=sample, title="t-SNE Projection of Examples") chart.save(FLAGS.plot_dir + "/tsne.html", format="html")
def macd_chart(source, width=900, height=600): """ Calculates the MACD (DIF) and OSC. The below logic applies and should be used to signal: 1. When DIF and DEA are positive, the MACD line passes (exceeds) the OSC line going upwards, and the divergence is positive, there is a buy signal confrmation. 2. When DIF and DEA are negative, the MACD line exceeds the OSC line going downwards, and the divergence is negative, there is a sell signal confrmation. Args: source (pandas.DataFrame): Stock data with EMA columns. width (int): Chart width in pixels. height (int): Total chart height in pixels. Returns: (altair.vegalite.v4.api.Chart): Altair chart object. """ # Common axis zoom selector for both charts. zoom = alt.selection_interval(bind='scales', encodings=['x']) # Coloration based on closing price being higher or # lower than opening price. open_close_color = alt.condition("datum.MACD > datum.DEA", alt.value("#06982d"), alt.value("#ae1325")) # Get MACD and signal line in correct form. df_list = [] for d in ['MACD', 'DEA']: df_temp = source[['Date', d]].copy() df_temp = df_temp.rename(columns={d: 'VALUE'}) df_temp['Label'] = d df_list.append(df_temp) df1 = pd.concat(df_list) macd_lines = alt.Chart(df1)\ .properties()\ .mark_line()\ .encode( x='Date:T', y='VALUE', color='Label', opacity=alt.value(0.8)) # Colored bars for MACD. bar = alt.Chart(source).mark_bar()\ .properties(width=width, height=int(height*0.3))\ .encode( x='Date:T', y='OSC', color=open_close_color)\ .add_selection(zoom) # Candlestick chart. candle = candlestick_chart(source, width=width, height=int(0.7 * height)) # Add EMA_12 and EMA_26 to the Candlestick chart. df_list = [] for d in ['EMA_12', 'EMA_26']: df_temp = source[['Date', d]].copy() df_temp = df_temp.rename(columns={d: 'VALUE'}) df_temp['Label'] = d df_list.append(df_temp) df2 = pd.concat(df_list) ema_lines = alt.Chart(df2)\ .properties()\ .mark_line()\ .encode( x='Date:T', y='VALUE', color='Label')\ .add_selection(zoom) return alt.vconcat(candle + ema_lines, bar + macd_lines)
def interactive_plot(self, element='Cu', display=True, save=False, filename='altair_dashboard.html', base_dir=None): # Only select pixels that have Cu values cu_points = self.data[self.data[element].notnull()] alt.data_transformers.disable_max_rows() cu_points['num_present_elements'] = cu_points['element_group'].apply( lambda x: len(x.split('|'))) #unpivot the table into a new table with ['pixel', 'element', 'concentration'] cu_points_melted = cu_points.melt(id_vars='element_group', value_vars=self.elements, var_name='element', value_name='concentration') selection = alt.selection_multi(fields=['element_group']) #base bar chart bar_base = alt.Chart(cu_points_melted).transform_filter( selection).encode(y='element', x='sum(concentration)', text=alt.Text('sum(concentration)', format='0.2f')) #function to create scatter plot based on element scatter_base = lambda e: alt.Chart(cu_points).mark_circle().encode( x=element, y=e, tooltip=self.elements, color=alt.condition(selection, 'element_group:N', alt.value('lightgray'), legend=None)).add_selection(selection) counts = alt.Chart(cu_points).transform_filter( selection).transform_aggregate( count='count()').transform_calculate( text="number of points selected: " + alt.datum.count).mark_text(dy=-20, baseline="top", align="left").encode( x=alt.value(100), y=alt.value(5), text='text:N', ) # legend plot legend = alt.Chart(cu_points).mark_circle(size=100).encode( x='element_group:N', color=alt.condition( selection, 'element_group:N', alt.value('lightgray'))).add_selection(selection) #bar plots bar_final = bar_base.mark_bar() bar_text = bar_base.mark_text(align='left', baseline='middle', dx=3) #scatter plots scatters = alt.vconcat(*[ alt.hconcat(*[(scatter_base(e).properties(width=300, height=150) + counts) for e in self.elements[i:i + 3] if e != 'cu']) for i in range(0, len(self.elements) - 1, 3) ]) #final plot, put together with altair's fancy syntax final_plot = alt.vconcat(scatters, legend | (bar_final + bar_text), center=True) if save: if base_dir is None: raise ValueError('base_dir cannot be None if saving') final_plot.save(os.path.join(base_dir, filename)) if display: return final_plot
=================== This chart shows an example of using an interval selection to filter the contents of an attached histogram, allowing the user to see the proportion of items in each category within the selection. """ # category: interactive charts import altair as alt from vega_datasets import data cars = data.cars.url brush = alt.selection(type='interval') points = alt.Chart().mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.condition(brush, 'Origin:N', alt.value('lightgray')) ).properties( selection=brush ) bars = alt.Chart().mark_bar().encode( y='Origin:N', color='Origin:N', x='count(Origin):Q' ).transform_filter( brush.ref() ) alt.vconcat(points, bars, data=cars)
legend=alt.Legend(title="Clade")) #Encoding Size size_encode = alt.Size('value:Q', legend=alt.Legend(title="% Prevalence")) # ==== CHART CREATION ==== main_chart = alt.Chart(cd_data).mark_circle().encode(x=x_encode, y=y_encode, color=color_encode, size=size_encode) sum_chart = alt.Chart(sum_data).mark_bar().encode( x='Species:O', y=alt.Y('Total Disease:Q', title='% with disease'), color=color_encode, ).properties(height=100) # combine the two charts into one image full_chart = alt.vconcat(sum_chart, main_chart).configure(background="white") # save graph full_chart.save('../figures/summarydata.png') #open image of chart img = Image.open('../figures/summarydata.png') img.show()
def describe_cat_var(dataframe, cat_vars, n_cols=3): """ This function will take dataframe and categorical variable names and will plot the histogram of each categorical variable Parameters ----------- dataframe: `pandas.DataFrame` The dataframe whose EDA analysis is to be performed cat_vars: `list` A list containing names of categorical variables n_cols: `int`, optional A number indicating how many plots should be displayed in a row Returns -------- `altair` a grid of altair plot containing all histograms Examples --------- >>> X= pandas.DataFrame({ 'type':['Car','Bus','Car'] 'height':[10,20,30] }) >>> cat_vars = ['type'] >>> describe_cat_variable(X,cat_vars) """ # Checking for valid inputs if not isinstance(dataframe, pd.DataFrame): raise Exception("The value of the argument 'dataframe' must be " + "of type 'pandas.DataFrame'") if not isinstance(cat_vars, list) or \ not all(isinstance(x, str) for x in cat_vars): raise Exception("The value of the argument 'cat_vars' must be " + "a list of strings") if not isinstance(n_cols, int) or n_cols <= 0: raise Exception("The value of the argument 'n_cols' must be " + "a positive non zero integer") col_set = set(dataframe.columns) col_subset = set(cat_vars) if not col_subset.issubset(col_set): raise Exception("The input categorical column names must belong to " + "the dataframe") dataframe = dataframe.dropna() data = dataframe[col_subset] n = len(cat_vars) n_cols = n_cols n_rows = int(np.ceil(n / n_cols)) z = 0 # Plotting the histograms in loop for i in range(n_rows): for j in range(n_cols): if z < n: cols = cat_vars[z] else: break hist = alt.Chart(data).mark_bar(width=40).encode( x=alt.X(cols + ':O'), y='count()').properties(height=200, width=300, title='Histogram of ' + cat_vars[z]) z = z + 1 if j == 0: row_plot = hist else: row_plot = alt.hconcat(row_plot, hist) if i == 0: plot = row_plot else: plot = alt.vconcat(plot, row_plot) return plot
plt = altair.vconcat( altair.Chart(change).mark_bar().encode( y=altair.Y( 'Postcode District:N', title='Postcode District (highest %s at top, lowest at bottom)' %name, sort=altair.SortField( order, 'descending' ) ), x=altair.X('Change:Q', title='New vaccinations this week'), color=altair.Color( 'Colour', scale=altair.Scale( range=['grey','blue','orange'], domain=['None','Regional Centre','Mobile Clinic'], ), legend=altair.Legend(title='') ), ).properties( height=1000, width=450, title='NI COVID-19 Vaccinations last week by Postcode District' ) ).properties( title=altair.TitleParams( ['Vaccinations data from HSCNI COVID-19 dashboard, mid-2018 populations from NISRA', 'Mobile vaccination clinic locations for last week from nidirect', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), )
"count": uniques, "pct": uniques / df_len, }) else: head_sr = uniques.head(half_lim_values) tail_sr = uniques.tail(half_lim_values) remainder = uniques[half_lim_values:-half_lim_values] counts = np.concatenate((head_sr, [remainder.sum()], tail_sr)) # counts = pd.concat((head_sr, [remainder.sum()], tail_sr)) chart_df = pd.DataFrame({ "value": head_sr.index.to_list() + ["..[Others].."] + tail_sr.index.to_list(), "count": counts, "pct": counts / df_len, }) # display(chart_df.head()) bars = alt.Chart(chart_df, title=f"Field: {col}").mark_bar().encode( x=alt.X("pct:Q", axis=alt.Axis(format='%'), title="Percentage of records"), y=alt.Y("value:N", sort=None, title="Value"), ) text = bars.mark_text( align="left", baseline="middle", dx=3 # Nudges text to right so it doesn't appear on top of the bar ).encode(text="count:Q") charts.append(bars + text) chart = alt.vconcat( *charts, title=f"Most- and least-common values per categorical field (from {df_len} total records)", )
alt.Y('Miles_per_Gallon', type='quantitative'), alt.Color('Origin', type='nominal') ) base = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', ).properties( width=150, height=150 ) alt.vconcat( base.encode(color='Cylinders:Q').properties(title='quantitative'), base.encode(color='Cylinders:O').properties(title='ordinal'), base.encode(color='Cylinders:N').properties(title='nominal'), ) # --- pop = data.population.url base = alt.Chart(pop).mark_bar().encode( alt.Y('mean(people):Q', title='total population') ).properties( width=200, height=200 )
def showRatingDistribution(data, name=''): """Create an interaactive visualization showing the distribution of ratings Args: data (DataFrame): the input data frame that must at least consists two columns 'name' and 'rating' for app names and ratings. name (str): the name of the platform (optional) to be displayed. Return: Chart: an Altair chart object that corresponds to the visualization """ ## The color expression for highlighting the bar under mouse color_expression = "highlight._vgsid_==datum._vgsid_" color_condition = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue") ## There are two types of selection in our chart: ## (1) A selection for highlighting a bar when the mouse is hovering over highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover") ## (2) A selection for updating the rating distribution when the mouse is clicked ## Note the encodings=['y'] parameter is needed to specify that once a selection ## is triggered, it will propagate the encoding channel 'y' as a condition for ## any subsequent filter done on this selection. In short, it means use the data ## field associated with the 'y' axis as a potential filter condition. rating_selection = alt.selection_single(name="PROVIDER", empty="all", encodings=['y']) ## We need to compute the max count to scale our distribution appropriately maxCount_BORO = int(data['BORO'].value_counts().max()) maxCount_SSID = int(data['PROVIDER'].value_counts().max()) ## Our visualization consists of two bar charts placed side by side. The first one ## sorts the apps by their average ratings as below. Note the compound selection ## that is constructed by adding the two selections together. barMean = alt.Chart() \ .mark_bar(stroke="Black") \ .encode( alt.Y('BORO:O', axis=alt.Axis(title="Location of Hotspot"), sort=alt.SortField(field="BORO", op="count", order='descending'), ), alt.X("count()", axis=alt.Axis(title="Number of Hotspot"), scale = alt.Scale(domain=(0,maxCount_BORO)), ), alt.ColorValue("LightGrey", condition=color_condition), ).properties( selection = highlight_selection+rating_selection ) ## The second one uses the selected app specified by the rating_selection ## to filter the data, and build a histogram based on the ratings. Note ## the use of rating_selection.ref() as a condition for transform_filter(). ## The scale was explicitly constructed for the X axis to fill out the ## the potential empty values, e.g. no one gave an app a score of 3, but ## we still want to show 1, 2, 3, 4, and 5 in the axis (but not in with .5). barRating = alt.Chart() \ .mark_bar(stroke="Black") \ .encode( alt.X("PROVIDER:O", axis=alt.Axis(title="PROVIDER"), sort=alt.SortField(field="PROVIDER", op="count", order='descending'), ), alt.Y("count()", axis=alt.Axis(title="Number of Hotspot"), scale=alt.Scale(domain=(0,maxCount_SSID)), ), alt.ColorValue("LightGrey"), ).properties( selection = highlight_selection ).transform_filter( rating_selection.ref() ) states = "https://raw.githubusercontent.com/hvo/datasets/master/nyc_zip.geojson" # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white').properties(title='Map', width=500, height=500).project('albersUsa') points = alt.Chart(data).mark_point(filled=True, size=200).encode( longitude='LON:Q', latitude='LAT:Q', color=alt.value('SteelBlue'), size=alt.value(30)).transform_filter(rating_selection.ref()) ## We just need to concatenate the plots horizontally, and return the result. return alt.hconcat(alt.vconcat( barMean, barRating, data=data, title="{} Hotspot Distribution".format(name)), (background + points), data=data)
'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/firsts.csv' ) firsts.to_csv('/Users/vivekparashar/Downloads/firsts.csv') # Create/Convert a pandas dataframe to dplython df firsts = DplyFrame(firsts) firsts.columns firsts.gender.unique() firsts.category.unique() # firsts df summary by category t1 = (firsts >> mutate(year_grp=((X.year / 10).round()) * 10) >> group_by( X.year_grp, X.category) >> summarize(nrows=X.accomplishment.count())) c1 = alt.Chart(t1).mark_circle().encode(x='year_grp:O', y='category:O', size='nrows:Q') c3 = alt.Chart(t1).mark_bar().encode(x='year_grp', y='nrows', color='category') # firsts df summary by gender t2 = (firsts >> mutate(year_grp=((X.year / 10).round()) * 10) >> group_by( X.year_grp, X.gender) >> summarize(nrows=X.accomplishment.count())) c2 = alt.Chart(t2).mark_circle().encode(x='year_grp:O', y='gender:O', size='nrows:Q') chart = alt.vconcat(c2, c1, c3) chart.save( '/Users/vivekparashar/OneDrive/OneDrive-GitHub/Challenges-and-Competitions/TidyTuesday/Data/2020-11-17/chart.png', scale_factor=2.0)
def plot_planning(planning, need, timeline): # Plot graph - Requirement source = need.copy() source = source.rename(columns={0: "Hours"}) source["Date"] = source.index bars_need = ( alt.Chart(source) .mark_bar() .encode( y="Hours:Q", column=alt.Column("Date:N"), tooltip=["Date", "Hours"], ) .interactive() .properties( width=550 / len(timeline) - 22, height=75, title='Requirement', ) ) # Plot graph - Optimized planning source = planning.filter(like="Total hours", axis=0).copy() source["Date"] = list(source.index.values) source = source.rename(columns={"Solution": "Hours"}).reset_index() source[["Date", "Line"]] = source["Date"].str.split(",", expand=True) source["Date"] = source["Date"].str.split("[").str[1] source["Line"] = source["Line"].str.split("]").str[0] source["Min capacity"] = 7 source["Max capacity"] = 12 source = source.round({"Hours": 1}) source["Load%"] = pd.Series( ["{0:.0f}%".format(val / 8 * 100) for val in source["Hours"]], index=source.index, ) bars = ( alt.Chart(source) .mark_bar() .encode( x="Line:N", y="Hours:Q", column=alt.Column("Date:N"), color="Line:N", tooltip=["Date", "Line", "Hours", "Load%"], ) .interactive() .properties( width=550 / len(timeline) - 22, height=150, title="Optimized Production Schedule", ) ) chart = alt.vconcat(bars, bars_need) chart.save("planning_time_model3.html") dp.Report(dp.Plot(chart, caption="Production schedule model 3 - Time")).publish( name="Optimized production schedule model 3 - Time", description="Optimized production schedule model 3 - Time", open=True, visibily="PUBLIC", )
def altair_frozen_weights_performance_ridge_plot(data, xaxis_title = "Dev Metric", title_main = "Dense Variably Unfrozen", task_name = "MSR", step_all = 75, width_all = 600, step_small = 30, width_small = 400, overlap = 1, max_bins = 30, color_scheme = 'redyellowblue', return_all = True): assert type(data) is pd.core.frame.DataFrame, "Parameter `data` must be of type pandas.core.frame.DataFrame." assert all(e in data.columns.to_list() for e in ['Frozen Weights Pct', 'Epoch', 'Dev Metric']), "Parameter `data` must contain the following columns: ['Frozen Weights Pct', 'Epoch', 'Dev Metric']." # generate the combined epochs plot domain_ = [min(data['Dev Metric']), max(data['Dev Metric'])] c0 = alt.Chart(data, height=step_all)\ .transform_joinaggregate(mean_acc='mean(Dev Metric)', groupby=['Frozen Weights Pct'])\ .transform_bin(['bin_max', 'bin_min'], 'Dev Metric', bin=alt.Bin(maxbins=max_bins))\ .transform_aggregate(value='count()', groupby=['Frozen Weights Pct', 'mean_acc', 'bin_min', 'bin_max'])\ .transform_impute(impute='value', groupby=['Frozen Weights Pct', 'mean_acc'], key='bin_min', value=domain_[0])\ .mark_area(interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5)\ .encode( alt.X('bin_min:Q', bin='binned', title=xaxis_title, scale=alt.Scale(domain=domain_)), alt.Y('value:Q', scale=alt.Scale(range=[step_all, -step_all * overlap]), axis=None), alt.Fill('mean_acc:Q', legend=None,scale=alt.Scale(domain=[sum(x) for x in zip(domain_[::-1], [-0.05, 0.05])], scheme=color_scheme)))\ .properties(width = width_all, height = step_all)\ .facet( row=alt.Row( 'Frozen Weights Pct:O', title='Forzen Weights Pct (Binned)', header=alt.Header( labelAngle=0, labelAlign='right', labelFontSize=15, labelFont='Lato', labelColor=berkeley_palette['pacific'], titleFontSize=20 ) ) ).properties(title={'text':title_main, 'subtitle': " ".join([task_name,"- All Epochs"])}, bounds='flush') # if not returning all plots, then return the main "All Epochs" plot if not (return_all): return c0.configure_facet(spacing=0).configure_view(stroke=None).configure_title(anchor='middle') # generate the individual epochs plots subplots = [None] * 4 for i in range(1,5): domain_ = [min(data[(data['Epoch'] == i)]['Dev Metric']), max(data[(data['Epoch'] == i)]['Dev Metric'])] o = alt.Chart(data[(data['Epoch'] == i)], height=step_small)\ .transform_joinaggregate(mean_acc='mean(Dev Metric)', groupby=['Frozen Weights Pct'])\ .transform_bin(['bin_max', 'bin_min'], 'Dev Metric', bin=alt.Bin(maxbins=max_bins))\ .transform_aggregate(value='count()', groupby=['Frozen Weights Pct', 'mean_acc', 'bin_min', 'bin_max'])\ .transform_impute(impute='value', groupby=['Frozen Weights Pct', 'mean_acc'], key='bin_min', value=domain_[0])\ .mark_area(interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5)\ .encode( alt.X('bin_min:Q', bin='binned', title=xaxis_title, scale=alt.Scale(domain=domain_)), alt.Y('value:Q', scale=alt.Scale(range=[step_small, -step_small * overlap]), axis=None), alt.Fill('mean_acc:Q', legend=None, scale=alt.Scale(domain=[sum(x) for x in zip(domain_[::-1], [-0.05, 0.05])], scheme=color_scheme)))\ .properties(width = width_small, height = step_small)\ .facet( row=alt.Row( 'Frozen Weights Pct:O', title='Forzen Weights Pct (Binned)', header=alt.Header( labelAngle=0, labelAlign='right', labelFontSize=15, labelFont='Lato', labelColor=berkeley_palette['pacific'], titleFontSize=20 ) ) ).properties(title={'text':title_main, 'subtitle': " ".join([task_name, "- Epoch", str(i)])}, bounds='flush') subplots[i-1] = o viz = alt.hconcat(alt.vconcat(alt.hconcat(subplots[0], subplots[1]), alt.hconcat(subplots[2], subplots[3])), c0)\ .configure_facet(spacing=0)\ .configure_view(stroke=None)\ .configure_title(anchor='middle') return viz
=================== This chart shows an example of using an interval selection to filter the contents of an attached histogram, allowing the user to see the proportion of items in each category within the selection. """ # category: interactive charts import altair as alt from vega_datasets import data source = data.cars() brush = alt.selection(type='interval') points = alt.Chart().mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.condition(brush, 'Origin:N', alt.value('lightgray')) ).add_selection( brush ) bars = alt.Chart().mark_bar().encode( y='Origin:N', color='Origin:N', x='count(Origin):Q' ).transform_filter( brush ) alt.vconcat(points, bars, data=source)
color=alt.condition(brush, color, alt.value('lightgray')), size=alt.Size('precipitation:Q', scale=alt.Scale(range=[5, 200])) ).properties( width=600, height=300 ).add_selection( brush ).transform_filter( click ) # Bottom panel is a bar chart of weather type bars = alt.Chart().mark_bar().encode( x='count()', y='weather:N', color=alt.condition(click, color, alt.value('lightgray')), ).transform_filter( brush ).properties( width=600, ).add_selection( click ) alt.vconcat( points, bars, data=source, title="Seattle Weather: 2012-2015" )