def plot_forces(variables: pd.DataFrame) -> None: forces = variables.drop(["Height", "Weight"], axis=1).melt() forces[["type", "variable"]] = forces["variable"].str.split(expand=True) tables.describe_table(forces, groupby=["variable", "type"], description="variables") row_kwargs = dict(shorthand="variable", title=None, sort=forces_order) column = alt.Column("type", title=None) forces_plot = (plot_kde().facet( data=forces.query("type != 'Imb'"), row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) imb_plot = (plot_kde().facet( data=forces.query("type == 'Imb'"), row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) plots = (forces_plot | imb_plot).configure_facet(spacing=5) st.altair_chart(plots)
def ridge_plot(d, value, groupby, step=30, overlap=0.8, sort=None): return ( alt.Chart(d) .transform_joinaggregate(mean_value=f"mean({value})", groupby=[groupby]) .transform_bin(["bin_max", "bin_min"], value) .transform_aggregate( value="count()", groupby=[groupby, "mean_value", "bin_min", "bin_max"] ) .transform_impute( impute="value", groupby=[groupby, "mean_value"], key="bin_min", value=0 ) .mark_area( interpolate="monotone", fillOpacity=0.8, stroke="lightgray", strokeWidth=0.5 ) .encode( alt.X("bin_min:Q", bin="binned", title='activation', axis=alt.Axis(format='%', labelFlush=False)), alt.Y("value:Q", scale=alt.Scale(range=[step, -step * overlap]), axis=None), alt.Fill( "mean_value:Q", legend=None, scale=alt.Scale( domain=[d[value].max(), d[value].min()], scheme="redyellowblue" ), ), alt.Row( f"{groupby}:N", title=None, sort=alt.SortArray(sort) if sort else None, header=alt.Header(labelAngle=0, labelAlign="right", format="%B"), ), ) .properties(bounds="flush", height=step) .configure_facet(spacing=0) .configure_view(stroke=None) )
def price_subplot(df, color='Category', color_sort_order=['Clothes', 'Accessory'], color_scale='tableau10', price_scale=alt.Scale()): color_kwargs = {"scale": alt.Scale(scheme=color_scale)} if color_sort_order: color_kwargs['sort'] = color_sort_order chart = alt.Chart(df, width=50) result = chart.mark_circle(size=12, opacity=0.7).encode( x=alt.X( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y("Price:Q", axis=alt.Axis(), scale=price_scale), color=alt.Color(f'{color}:N', **color_kwargs), tooltip=['Product', 'Price', 'Price-2019']).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-2*log(random()))*cos(2*PI*random())') result = chart.mark_rule( color='red', size=2).encode(y=alt.Y("median(Price-2019):Q")) + result return result.facet(column=alt.Column( 'Year:O', header=alt.Header( labelAngle=-90, titleOrient='top', labelOrient='bottom', labelAlign='right', labelPadding=3, ), ))
def dur_dist_plot(dur_dist, to_json_for_lab=None): if to_json_for_lab is not None: alt.data_transformers.register("json", to_json_for_lab) alt.data_transformers.enable("json") return (alt.Chart(dur_dist).transform_density( "duration", as_=["duration", "density"], extent=[0, 70], groupby=["cluster"]).mark_area(orient="horizontal").encode( y="duration:Q", color="cluster:N", x=alt.X( "density:Q", stack="center", impute=None, title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True), ), column=alt.Column( "cluster:N", header=alt.Header( titleOrient="bottom", labelOrient="bottom", labelPadding=0, ), ), ).properties(width=100).configure_facet(spacing=0).configure_view( stroke=None))
def altairPlot(): import altair as alt from vega_datasets import data source = data.movies.url stripplot = alt.Chart(source, width=80).mark_circle(size=8).encode( x=alt.X( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y('IMDB_Rating:Q'), color=alt.Color('Major_Genre:N', legend=None), column=alt.Column( 'Major_Genre:N', header=alt.Header( labelAngle=-90, titleOrient='top', labelOrient='bottom', labelAlign='right', labelPadding=3, ), ), ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-2*log(random()))*cos(2*PI*random())').configure_facet( spacing=0).configure_view(stroke=None) return stripplot
def bv_violinPlot(data, engine, xlabel, ylabel1, ylabel2): data = data.copy() data.rename(columns={'plotY':ylabel1, 'plotX1':ylabel2}, inplace=True) data = data[[ylabel1, ylabel2]].copy() if engine == 'Static': plt.rcParams['figure.figsize'] = (9,6) fig = sns.violinplot(x = 'variable', y = 'value', data = data.melt()) fig.grid(b=True, which='major', color='k', linewidth=0.25) fig.grid(b=True, which='minor', color='k', linewidth=0.125) plt.close() return pn.pane.Matplotlib(fig.figure, tight=True) elif engine == 'Interactive': p = alt.Chart(data.dropna().melt()) p = p.transform_density('value', as_=['value', 'density'], groupby=['variable']) p = p.mark_area(orient='horizontal').encode( y=alt.Y('value:Q', axis=alt.Axis(format='~s')), color='variable:N', x=alt.X('density:Q', stack='center', impute=None, title=None, axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True)), column=alt.Column('variable:N', header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelPadding=0))) p = p.properties(width = 200, height = 280) p = p.configure_facet(spacing=0) p = p.configure_view(stroke=None) return p
def punchcode(): dat = df.copy() dat['mnth_yr'] = dat['workshop_start'].dt.to_period('M').astype(str) dat['workshop_category'] = dat['workshop_category'].apply( lambda x: 'Corporate' if (x == 'Corporate') else 'Public') dat['contrib'] = dat['workshop_hours'] * dat['class_size'] chart = alt.Chart( dat[dat.name != 'Capstone']).mark_circle(color='#bbc6cbe6').encode( x=alt.X('mnth_yr:T', axis=alt.Axis(title='')), y='name:O', size=alt.Size('sum(contrib):Q', legend=None), column=alt.Column('workshop_category:O', title=None, sort="descending", header=alt.Header(titleColor='#bbc6cbe6', labelColor='#bbc6cbe6', labelAngle=30, titleFontSize=40, titleAngle=30))).properties( width=300, height=320).configure_axis( labelColor='#bbc6cbe6', titleColor='#bbc6cbe6', grid=False) return chart.to_json()
def homeAdv(): adv = pd.read_csv("App/Data/homeadvantage.csv") palette = alt.Scale(domain=['Home Team', 'Away Team'], range=["#5bc0de", "#d9534f"]) chart = alt.Chart(adv, height=500, width=1000).mark_bar().encode( x=alt.X('points:Q', title='Average points'), y=alt.Y('team_flag:N', sort='-y', title='', axis=alt.Axis(labels=False)), color=alt.Color('team_flag:N', scale=palette, title=''), row=alt.Row('league:N', title='', sort=alt.EncodingSortField("points", op='max', order='descending'), header=alt.Header(labelAngle=0, labelAlign='left')), tooltip=[alt.Tooltip( 'points:Q', format='.2f')]).properties(height=25).configure_view( stroke='transparent').configure_axis(grid=False).interactive() return chart.to_json()
def cbo_bar_chart(cbo_data, var, title, bar_width=30, width=600, height=250): """ Creates a bar chart comparing the current and new CBO projections Parameters ---------- cbo_data: data containing both current and new CBO projections concatenated together var: Y-axis variable title: title of the chart bar_width: width of the bars in the plot width: width of the chart height: height of the chart """ # we divide up total width equally among facets of the chart _width = width / len(cbo_data["index"].value_counts()) chart = (alt.Chart(cbo_data, title=title).mark_bar(width=bar_width).encode( x=alt.X( "Projections", axis=alt.Axis(title=None, labels=False, ticks=False, labelFontSize=15), ), y=alt.Y(var, axis=alt.Axis(labelFontSize=10, titleFontSize=15)), color=alt.Color("Projections"), column=alt.Column("index", header=alt.Header(title=None, labelOrient="bottom")), ).properties( height=height, width=_width).configure_view(stroke="transparent").configure_facet( spacing=0).configure_title(fontSize=20)) return chart
def player_roll_chart(self): """ """ # Make Altair bar chart plt_df = self.player_count.round(2) roll_chart = alt.Chart(plt_df).mark_bar(strokeWidth=0.5, stroke="black").encode( x=alt.X("Player:O", axis=alt.Axis(title=None, labels=False, ticks=False)), y='Count:Q', color=alt.Color('Player:N', scale=alt.Scale( domain=self.player_names, range=self.player_colors), legend=alt.Legend()), column=alt.Column("Roll:N", header=alt.Header(title=None, labelOrient="bottom", labelFontSize=22)), tooltip=list(self.player_count.columns) ).configure_view( strokeWidth=0 ).configure_title( fontSize=32, limit=800, dx=45, dy=-50, font="Arial", align="center", anchor="middle" ).configure_legend( strokeColor="black", padding=10, orient="bottom", cornerRadius=10, direction="horizontal", labelFontSize=10 ).properties( title="Roll Count by Player", width=self.screen_width / 45 ).configure_axis( grid=False, labelFontSize=14, titleFontSize=16 ) return roll_chart
def plot_shap_values(X: pd.DataFrame, model: dict) -> pd.DataFrame: target = "EB mean force" # st.pyplot( # shap.summary_plot(shap.TreeExplainer(model[target], data=X).shap_values(X), X) # ) shap_values = pd.DataFrame(shap.TreeExplainer(model[target], data=X).shap_values(X), columns=X.columns) y_order = shap_values.abs().mean().nlargest(6).index.to_list() shap_values = shap_values[y_order].melt() # shap_values["rank"] = X.rank().melt()["value"].values shap_values["Z-score"] = ((X[y_order] - X[y_order].mean()) / X[y_order].std()).melt()["value"].clip( -0.5, 0.5) # dist = ( # alt.Chart(shap_values) # .mark_circle(size=100) # .encode( # alt.X("value", title=None), # alt.Y("variable", title=None, sort=y_order), # alt.Color("Z-score", scale=alt.Scale(scheme="redblue", domain=[-2.5, 2.5])), # ) # ) # rule = alt.Chart(pd.DataFrame([{'zero': 0}])).mark_rule().encode(alt.X('zero')) stripplot = alt.Chart(shap_values, height=20, width=width).mark_circle( size=100, clip=True).encode( alt.Y( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False), ), alt.X('value', title="Shap value", scale=alt.Scale(domain=[-.4, .4])), alt.Color("Z-score", scale=alt.Scale(scheme="redblue", domain=[-0.5, 0.5])), alt.Row( 'variable', title=None, sort=y_order, header=alt.Header( labelAngle=0, labelAlign='left', ), ), ).transform_calculate( jitter='sqrt(-2*log(random()))*cos(2*PI*random())' ).configure_facet(spacing=0).configure_view(stroke=None) st.altair_chart(stripplot)
def participant_count_plot_live(data): df2 = data[['Start Date', 'Treatment', 'ROWID']].copy() df2['Start Date'] = df2['Start Date'].dt.normalize() df2 = df2.drop_duplicates().groupby(by=['Start Date', 'Treatment']).agg({ 'ROWID': 'count' }).reset_index() df2.columns = ['date', 'branch', 'total'] df2['display_date'] = df2.date.dt.strftime('%b %d') df2['source'] = 'Amazon' df2.loc[(df2.date > '2021-04-05'), 'source'] = 'XLab' df2 = df2.groupby(by=['branch', 'source']).agg({ 'total': 'sum' }).reset_index().rename(columns={'branch': 'treatment'}) base = alt.Chart().mark_bar().encode( x=alt.X('total:Q', axis=alt.Axis(title='Participants Assigned', labelPadding=10, labelFontSize=20, titleFontSize=25)), y=alt.X('treatment:O', axis=alt.Axis(title='', labelAngle=0, labelPadding=10, labelFontSize=20, titleFontSize=25), sort=['Control', 'Typographical', 'Phonological']), color=alt.Color( 'treatment:O', legend=None, scale=alt.Scale(range=[ berkeley_palette['pacific'], berkeley_palette['berkeley_blue'], berkeley_palette['founders_rock'] ]))).properties(width=650, height=150) txt = base.mark_text(dx=-15, size=15).encode(text='total:Q', color=alt.value('white')) p = alt.layer(base, txt).properties(width=600, height=150, title={'text':''})\ .facet( row=alt.Row('source:N', sort=alt.SortArray(['XLab','Amazon']), header=alt.Header(labelColor=berkeley_palette['pacific'], labelFontSize=25,labelFont='Lato',title='') ), data=df2, title='Live Study Participation' ).configure(padding={'top':20, 'left':20, 'right':20,'bottom':20})\ .configure_facet(spacing=10)\ .configure_view(stroke=None)\ .configure_title(anchor='middle')\ .configure_axis(grid=False)\ .configure_title(dy=-20) return p
def income_expenses_over_time(df_orig): # Time interval aggregation level time_interval = st.sidebar.radio( "Time interval:", ("Month", "Quarter", "Year"), index=1) dfn, n_levels = time_interval_aggregation(df_orig, time_interval) if st.sidebar.checkbox('Invert sign of "Income"', value=True): dfn.loc["Income", :] = -dfn.loc["Income", :].values st.subheader('Income and Expenses over Time') plot_type = st.sidebar.selectbox('Plot type', ["pyplot", "altair", "bokeh"], key="plot_type") df_L0 = dfn.groupby(["Account_L0"]) \ .sum() \ .transpose() \ .reset_index() df_L0.columns.name = "Account" if plot_type == "pyplot": fig = plt.figure(figsize=(14, 5)) ax = plt.axes() df_L0.plot.bar(ax=ax, x=time_interval, y=["Income", "Expenses"], xlabel=time_interval, ylabel=df_L0["level_0"][0], rot=90) ax.locator_params(axis="x", tight=True, nbins=40) st.pyplot(fig) elif plot_type == "altair": n_intervals = df_L0.shape[0] df_new = df_L0.drop(columns="level_0") \ .set_index(time_interval) \ .stack() \ .reset_index() \ .rename(columns={0: dfn.columns.levels[0][0]}) custom_spacing = 2 chart = alt.Chart(df_new).mark_bar().encode( column=alt.Column(time_interval, spacing=custom_spacing, header=alt.Header(title="Income and Expenses", labelOrient='bottom', labelAlign='right', labelAngle=-90)), x=alt.X('Account:O', axis=alt.Axis(title=None, labels=False, ticks=False)), y=alt.Y('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0], axis=alt.Axis(grid=False)), color=alt.Color('Account', scale=alt.Scale(range=['#EA98D2', '#659CCA'])), tooltip=[alt.Tooltip('Account:O', title='Account'), alt.Tooltip('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0]), alt.Tooltip('{}:N'.format(time_interval), title=time_interval)] ).properties(width=(700 - n_intervals * custom_spacing) / n_intervals) st.altair_chart(chart, use_container_width=False) elif plot_type == "bokeh": x = [(ti, acnt) for ti in df_L0[time_interval] for acnt in ["Income", "Expenses"]] counts = sum(zip(df_L0['Income'], df_L0['Expenses']), ()) source = ColumnDataSource(data=dict(x=x, counts=counts)) p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=900, title="Income and Expenses", toolbar_location="above", tooltips=[("Period, Account", "@x"), ("Value", "@counts")]) p.vbar(x='x', top='counts', width=0.9, source=source) p.y_range.start = 0 p.x_range.range_padding = 0.5 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None st.bokeh_chart(p) return
def source_vs_hour_chart( base: alt.Chart, sensor_unit: str, max_absolute_error: float, faceted: bool = False ) -> Union[alt.Chart, alt.FacetChart]: hd_chart = ( base.mark_rect() .transform_joinaggregate( on_the_fly_mae="mean(mae)", on_the_fly_reference="mean(reference_value)", groupby=["event_start", "source"], ) .transform_calculate(accuracy=alt.datum.on_the_fly_mae) .encode( x=alt.X( "event_start:O", timeUnit="hours", axis=alt.Axis(domain=False, ticks=False, labelAngle=0), scale=alt.Scale(domain=list(range(24))), title="Hour of day", # "UTC hour of day" ), color=alt.condition( selectors.time_selection_brush, alt.Color( "accuracy:Q", scale=alt.Scale( domain=(max_absolute_error, 0), scheme="redyellowgreen" ), title="Error", ), alt.value(selectors.idle_color), ), tooltip=[ alt.Tooltip("event_start:T", timeUnit="hours", title="Hour of day"), alt.Tooltip( "accuracy:Q", title="Mean absolute error (%s)" % sensor_unit, format=".2f", ), ], ) ) if faceted: hd_chart = hd_chart.facet( row=alt.Row("source:O", title=None, header=alt.Header(labelAngle=0)) ) else: hd_chart = hd_chart.encode( y=alt.Y( "source:O", axis=alt.Axis(domain=False, ticks=False, labelAngle=0, labelPadding=5), title=None, ) ) return hd_chart.properties( title=alt.TitleParams("Model performance given a time of day", anchor="middle") )
def plot(data): """ Takes in a Dataframe data containing information about the population, educational attainment, and internet access for U.S. counties. Plots a grouped bar chart visualization comparing 5 urban and 5 rural counties, and the relationship between attaining a Bachelor's Degree and lacking internet access for these counties. """ counties = [ 'New York County', 'Los Angeles County', 'Cook County', 'Harris County', 'Maricopa County', 'Chaves County', 'Aroostook County', 'Clallam County', 'McCracken County', 'St. Landry Parish' ] data = clean(data, counties) data = calculate_percentage(data) # Plot q4_chart = alt.Chart(data).mark_bar().encode( x=alt.X('Statistic', type='nominal', sort=counties, title=None, axis=alt.Axis(labels=False)), y='Percentage:Q', color=alt.Color('Statistic:N', scale=alt.Scale(range=['#96ceb4', '#ffcc5c']), title=None), column=alt.Column('County:N', sort=counties, header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelAngle=-90, labelPadding=90, labelBaseline='middle')) ).properties( title={ 'text': [ 'Internet Access and Education Attainment in Urban vs. ' + 'Rural Counties (2016)' ], 'subtitle': [ '', '. Urban Urban Urban ' + ' Urban Urban Rural Rural' + ' Rural Rural Rural' ], 'subtitlePadding': 10 }).configure_title(fontSize=18, orient='top', offset=12, anchor='start').configure_axisX(labelPadding=100) q4_chart.save('q4_chart.html')
def faceted_bar_chart( df: pd.DataFrame(), xcol: str, xtitle: str, ycol: str, ytitle: str, colorcol: str, textcol: str, title: str, columncol: str, legend_title="Hardware") -> alt.vegalite.v4.api.FacetChart: """ Method that outputs a raw faceted bar chart. This does not process the input df, so it has to come already processed. Parameters ---------- df_: str dataframe from which the bar chart will be created. xcol: str dataframe column name that will be used for the x axis of the plot. xtitle:str title of the x-axis. ycol: str dataframe column name that will be used for the y axis of the plot. ytitle:str title of the y-axis. colorcol:str dataframe column name that which will hold the separation between colors. textcol: str dataframe column name that will be used for the displaying the numeric values inside the plot. columncol:str dataframe column name which holds the separation between all the faceted charts, x axis above plot. title: str Chart title. Returns ------- alt.vegalite.v4.api.Chart Faceted bar chart created from the input dataframe. """ bars = alt.Chart().mark_bar().encode( x=alt.X(xcol + ':N', title=xtitle), y=alt.Y(ycol + ':Q', title=ytitle), color=alt.Color(colorcol + ':N', title=legend_title), ) text = bars.mark_text( angle=270, align='left', baseline='middle', dx=10 # Nudges text to right so it doesn't appear on top of the bar ).encode(text=alt.Text(ycol + ':Q', format='.1f')) return alt.layer(bars, text, data=df).facet(column=alt.Column( columncol + ':N', header=alt.Header(labelAngle=-85, labelAlign='right'), title=title)).interactive()
def grouped_bar_chart(data_frame, x_column, y_column, grouping_column, col_header_type, col_header_format): head = alt.Header(labelOrient="bottom") if col_header_type is not None: head.formatType = col_header_type head.format = col_header_format col = alt.Column(grouping_column, header=head) return alt.Chart(data_frame).mark_bar().encode( x=alt.X(x_column, axis=None), y=alt.Y(y_column, axis=alt.Axis(grid=True)), color=x_column, column=col)
def generate_ridgeline_plot(data, x_lab_country_name): """A function that generates a ridgeline plot for covid_19 CAN & USA dataset. Parameters ---------- data input data set from preprocessed csv. x_lab_country_name name of the country for which we want to generate the ridgeline plot Returns ------- altair object returns the plot as a altair object """ step = 40 overlap = 1 ridgeline_plt = alt.Chart( data, height=step ).transform_timeunit(Month='month(date)').transform_joinaggregate( mean_response_ratio='mean(response_ratio)', groupby=['Month'] ).transform_bin([ 'bin_max', 'bin_min' ], 'response_ratio').transform_aggregate( value='count()', groupby=['Month', 'mean_response_ratio', 'bin_min', 'bin_max'] ).transform_impute( impute='value', groupby=['Month', 'mean_response_ratio' ], key='bin_min', value=0).mark_area( interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5).encode( alt.X('bin_min:Q', bin='binned', title=f'Mean Response Ratio in {x_lab_country_name}'), alt.Y('value:Q', scale=alt.Scale(range=[step, -step * overlap]), axis=None), alt.Fill('mean_response_ratio:Q')).facet( row=alt.Row('Month:T', title=None, header=alt.Header(labelAngle=0, labelAlign='right', format='%B'))).properties( title='', bounds='flush') return ridgeline_plt
def plot_error_dist(predictions: pd.DataFrame) -> None: predictions_melted = predictions.melt(id_vars="target", value_vars=["MAE", "MAPE"]) tables.describe_table(predictions_melted, groupby=["target", "variable"]) row_kwargs = dict(shorthand="target", title=None, sort=forces_order) column = alt.Column("variable", title=None) mae = (plot_kde().facet( data=predictions_melted.query("variable == 'MAE'"), row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) mape = (plot_kde().facet( data=predictions_melted.query("variable == 'MAPE'"), row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) plots = (mae | mape).configure_facet(spacing=5) st.altair_chart(plots)
def strip_plot(df, ordered_cats, name): """Make a strip plot comparing topics in different categories """ stripplot = ( alt.Chart(df).mark_circle( size=14, stroke="grey", strokeWidth=0.5).encode( x=alt.X( "jitter:Q", title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y("ratio:Q", title="Specialisation"), tooltip=["index"], size=alt.Size( "levels", title=["Number", "of papers"], # scale=alt.Scale(type='log') ), color=alt.Color("cat_sel:N", legend=None, scale=alt.Scale(scheme="tableau10")), column=alt.Column( "cat_sel:N", title="arXiv category", sort=ordered_cats, header=alt.Header( labelFontSize=12, labelAngle=270, titleOrient="top", labelOrient="bottom", labelAlign="center", labelPadding=25, ), ), ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter="sqrt(-2*log(random()))*cos(2*PI*random())") # .transform_filter( # alt.datum.levels > 0) .configure_facet(spacing=0).configure_view(stroke=None).configure_axis( labelFontSize=12, titleFontSize=12).properties(title=name, width=10, height=200)) return stripplot
def plot_targets(targets: pd.DataFrame) -> None: targets_melted = targets.melt() tables.describe_table(targets_melted, description="targets") dist_plot = (plot_kde().facet( data=targets_melted, row=alt.Row( "variable", title=None, header=alt.Header(labelAngle=0, labelAlign="left"), ), ).configure_facet(spacing=5).resolve_scale(y="independent").properties( bounds="flush")) st.altair_chart(dist_plot)
def plot_supervisor(states, gender, remote): data = m_data[m_data['state'].isin(states)] data = data[data['Gender'].isin(gender)] data = data[data['remote_work'].isin(remote)] chart = alt.Chart(data).mark_bar().encode( x=alt.X('work_interfere', axis=None), y=alt.Y('count()', title='Count of Participants'), tooltip='count()', color='work_interfere', column=alt.Column( 'supervisor', title='Have You Talked With Supervisor?', header=alt.Header(titleOrient='bottom', labelOrient='bottom'))).properties(width=75) return chart.to_html()
def scatter_subplot(df, color='Category', color_sort_order=['Clothes', 'Accessory'], y_col='Price', color_scale='tableau10', price_scale=alt.Scale(), size=14, opacity=0.7, tooltip=None): adjusted_price_col = [c for c in df.columns if c.startswith('Price-')][0] if tooltip is None: tooltip = list( dict.fromkeys(['Product', 'Price', adjusted_price_col, y_col])) if isinstance(color_scale, alt.Scale): color_kwargs = {"scale": color_scale} else: color_kwargs = {"scale": alt.Scale(scheme=color_scale)} if color_sort_order: color_kwargs['sort'] = color_sort_order chart = alt.Chart(df, width=50) result = chart.mark_circle(size=size, opacity=opacity).encode( x=alt.X( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y(f"{y_col}:Q", axis=alt.Axis(), scale=price_scale), color=alt.Color(f'{color}:N', **color_kwargs), tooltip=tooltip).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-1*log(random()))*cos(2*PI*random())') result = chart.mark_rule(color='red', opacity=0.5, size=2).encode( y=alt.Y(f"median({y_col}):Q")) + result result = chart.mark_boxplot( outliers=False, opacity=0.5, color='#A0A0A0', size=25).encode( y=alt.Y(f"{y_col}:Q", axis=alt.Axis(), scale=price_scale)) + result return result.facet(column=alt.Column( 'Year:O', header=alt.Header( labelAngle=-90, titleOrient='top', labelOrient='bottom', labelAlign='right', labelPadding=3, ), ))
def accum_global(): chart = alt.Chart(g.accum).mark_area().encode( column=alt.Column('workshop_category', title=None, sort="descending", header=alt.Header(titleColor='red', labelColor='red', titleAnchor="end")), x=alt.X("workshop_start", title="Date"), y=alt.Y("cumsum:Q", title="Cumulative"), color=alt.Color("variable", scale=alt.Scale(range=['#7dbbd2cc', '#bbc6cbe6']), legend=None), tooltip=['variable', 'cumsum:Q']).properties(width=350).configure_axis( labelColor='#bbc6cbe6', titleColor='#bbc6cbe6') return chart.to_json()
def bubble_chart(df, y, facet, tooltip): ''' create bubble chart :param df: Pandas DataFrame to display :param y: column of DataFrame to use for bubble size :param facet: column of DataFrame to create facet with :param tooltip: list of DataFrame columns to include in tooltip :return: altair bubble chart ''' return alt.Chart(df).mark_circle().encode( x=alt.X('days', axis=alt.Axis(grid=True)), y=alt.Y(y, axis=alt.Axis(grid=False, labels=False), title=None), color=alt.value('#17becf'), row=alt.Row(facet, title=None, header=alt.Header(labelAngle=-45)), tooltip=tooltip, size=alt.Size(y, scale=alt.Scale(range=[100, 500]))).properties( width=450, height=400 / len(df)).configure_facet(spacing=5).configure_view(stroke=None)
def generate_ridgeline_plot(data, attribute): '''ridge line plot: multiple histograms overlaps''' step = 40 overlap = 1 graph = alt.Chart(data).transform_joinaggregate( mean_attribute="mean({})".format(str(attribute)), groupby=['species']).transform_bin([ 'bin_max', 'bin_min' ], str(attribute)).transform_aggregate( value='count()', groupby=[ 'species', 'mean_attribute', 'bin_min', 'bin_max' ]).transform_impute( impute='value', groupby=['species', 'mean_attribute'], key='bin_min', value=0).mark_area( interpolate='monotone', fillOpacity=0.4, stroke='lightgray', strokeWidth=0.3).encode( alt.X('bin_min:Q', bin='binned', title=str(attribute)), alt.Y('value:Q', scale=alt.Scale(range=[step, -step * overlap]), axis=None), alt.Fill('mean_attribute:Q', legend=None, scale=alt.Scale(domain=[30, 5], scheme='redyellowblue')), alt.Row( 'species:O', title='Species', header=alt.Header( labelAngle=0, labelAlign='right'))).properties( bounds='flush', title='Comparison: {}'.format( str(metadata_description[attribute])), height=100, width=700, ).configure_facet(spacing=0, ).configure_view( stroke=None, ).configure_title( anchor='end') return graph
def visualize_emb(vis_dict): dict = vis_dict['dict'] c_names = vis_dict['c_names'] emb_vis_data = pd.DataFrame(dict) step = 20 overlap = 1 emb_chart = alt.Chart(emb_vis_data).transform_fold( c_names, as_=['embedding', 'lv'] ).mark_area( interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.2 ).encode( # x='x', # y='lv:Q', # alt.Color('embedding:N'), alt.X('x:Q', title=None, scale=alt.Scale(domain=[0,512], range=[0,1500])), alt.Y( 'lv:Q', title="", scale=alt.Scale(rangeStep=40), # scale=alt.Scale(range=[step, -step * overlap]), axis=None ), alt.Fill( 'embedding:N', legend=None, scale=alt.Scale(scheme='redyellowblue') ), row=alt.Row( 'embedding:N', title=None, header=alt.Header(labelAngle=360) ) ).properties( bounds='flush', title='Вектор статьи', height=step, width=1200 ).configure_facet( spacing=0 ).configure_view( stroke=None ).configure_title( anchor='middle' ) st.altair_chart(emb_chart, width=-1)
def draw_availability90_categories(df, upper_category, upper_type, upper_rename): brush = alt.selection_interval(encodings=["x", "y"]) field_availability_chart = alt.Chart(df).transform_filter( brush).transform_density( 'availability_90', as_=['availability_90', 'density'], groupby=[upper_category]).mark_area(orient='horizontal').encode( y=alt.Y('availability_90:Q', title='Availability in 90 Days'), color=alt.Color(upper_category, type=upper_type, legend=alt.Legend(title=upper_rename)), x=alt.X( 'density:Q', stack='center', impute=None, title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True), ), column=alt.Column(upper_category, type=upper_type, header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelPadding=0, title=upper_rename)), tooltip=[alt.Tooltip('availability_90:Q') ]).properties(width=200).interactive() scatterplot = alt.Chart(df[df['price'] < 500]).mark_circle( size=100).encode(alt.X("price"), alt.Y("review_scores_rating", scale=alt.Scale(zero=False), title='review score'), tooltip=[ alt.Tooltip('price', format='$.2f'), alt.Tooltip('review_scores_rating', title='Review Score') ]).add_selection(brush) st.write( "**Brush through the lower scatterplot to filter the upper chart by review score and price.**" ) st.write(field_availability_chart & scatterplot)
def plot_state_bar(states, gender, remote): click = alt.selection_multi(fields=['state'], bind='legend') data = m_data[m_data['state'].isin(states)] data = data[data['Gender'].isin(gender)] data = data[data['remote_work'].isin(remote)] chart = alt.Chart(data).mark_bar().encode( x=alt.X('state', axis=None), y=alt.Y('count()', title='Count of Participants'), tooltip='count()', color='state', column=alt.Column('treatment', title='Sought Treatment or Not', header=alt.Header(titleOrient='bottom', labelOrient='bottom')), opacity=alt.condition( click, alt.value(0.9), alt.value(0.1))).add_selection(click).properties(width=100) return chart.to_html()
def plot_interfere(states, gender, remote): click = alt.selection_multi(fields=['state'], bind='legend') data = m_data[m_data['state'].isin(states)] data = data[data['Gender'].isin(gender)] data = data[data['remote_work'].isin(remote)] chart = alt.Chart(data).mark_bar().encode( x=alt.X('state', axis=None), y=alt.Y('count()', title='Count of Participants'), tooltip='count()', color='state', column=alt.Column( 'work_interfere', title='Does Your Mental Illness Interfere With Work?', header=alt.Header(titleOrient='bottom', labelOrient='bottom')), opacity=alt.condition( click, alt.value(0.9), alt.value(0.2))).add_selection(click).properties(width=75) return chart.to_html()