def create_graph(): # retira o número máximo de linhas para pot com Altair alt.data_transformers.disable_max_rows() # faz query no banco de dados autores = queryDB('author', ['ID_author','author']) artigos = queryDB('paper', ['ID_paper','paper']) author_paper = queryDB('author_paper', ['ID_paper','ID_author']) autores['ID_author'] = autores['ID_author'].astype(str) artigos['ID_paper'] = artigos['ID_paper'].astype(str) ### renderiza os gráficos ## Grafo 1 - Autores (authors) print('Preparando grafo dos autores...') graph = nx.Graph() # dataframe com colunas: paper e [lista_autores] group = pd.DataFrame(author_paper.groupby('ID_paper')['ID_author'].apply(list)) # Adicionando "edges" for j,row in group.iterrows(): i=len(row['ID_author']) for i in range(len(row['ID_author'])): for k in range(i,len(row['ID_author'])): graph.add_edge(row['ID_author'][i], row['ID_author'][k]) pos = nx.spring_layout(graph,k=0.2, iterations=50, weight=0.1, center=(0.5,0.5)) # forces graph layout # coletando nodes nodes = to_pandas_nodes(graph,pos) nodes.reset_index(inplace=True) nodes.rename(columns={'index':'ID_author'}, inplace=True) nodes = pd.merge(nodes,autores,on='ID_author') # coletando nome dos autores nodes = pd.merge(nodes,author_paper, on='ID_author') # coletando ID_paper # coletando edges edges = to_pandas_edges(graph,pos) # Gráfico 1 print('Criando interatividade com o Altair (autores) ...') selector = alt.selection_single(empty='all',fields=['ID_author']) # iniciando seletor points = alt.Chart(nodes).add_selection(selector).mark_point(filled=True,size=90).encode( alt.X('x', axis=alt.Axis(title='')), alt.Y('y', axis=alt.Axis(title='')), tooltip='author', opacity=alt.condition(selector,alt.value(0.95),alt.value(0.4),legend=None), color=alt.condition(selector, 'ID_author', alt.value('lightgray'), legend=None) ).properties( selection=selector ).transform_filter(selector) # cria um background para efeitos de transição do seletor bk = alt.Chart(nodes).mark_point(color='lightgray',filled=True,size=90).encode( alt.X('x', axis=alt.Axis(title='')), alt.Y('y', axis=alt.Axis(title='')), tooltip='author', opacity=alt.value(0.4), ) lines = alt.Chart(edges).mark_line(color='salmon').encode( alt.X('x', axis=alt.Axis(title='')), alt.Y('y', axis=alt.Axis(title='')), detail='edge', opacity=alt.value(0.15) ) chart = alt.LayerChart(layer=(lines,bk+points)).properties( height=350, width=450 ).interactive() ## Grafo 2 - Artigos (papers) print('Preparando grafo dos artigos...') graph1 = nx.Graph() group1 = pd.DataFrame(author_paper.groupby('ID_author')['ID_paper'].apply(list)) # Adicionando "edges" for j,row in group1.iterrows(): i=len(row['ID_paper']) for i in range(len(row['ID_paper'])): for k in range(i,len(row['ID_paper'])): graph1.add_edge(row['ID_paper'][i], row['ID_paper'][k]) pos1 = nx.spring_layout(graph1,k=0.2, iterations=50, weight=0.1, center=(0.5,0.5)) # forces graph layout # coletando nodes nodes1 = to_pandas_nodes(graph1, pos1) nodes1.reset_index(inplace=True) nodes1.rename(columns={'index':'ID_paper'}, inplace=True) nodes1 = pd.merge(nodes1,artigos,on='ID_paper') # coletando nome dos papers nodes1 = pd.merge(nodes1,author_paper,on='ID_paper') # coletando ID_author # coletando edges edges1 = to_pandas_edges(graph1,pos1) # Gráfico 2 print('Criando interatividade com o Altair (artigos)...') points1 = alt.Chart(nodes1).add_selection(selector).mark_point(filled=True,size=90).encode( alt.X('x', axis=alt.Axis(title='')), alt.Y('y', axis=alt.Axis(title='')), tooltip='paper', opacity=alt.condition(selector,alt.value(0.95),alt.value(0.4),legend=None), color=alt.condition(selector, 'ID_author', alt.value('lightgray'), legend=None) ).transform_filter(selector) # cria um background para efeitos de transição do seletor bk1 = alt.Chart(nodes1).mark_point(color='lightgray',filled=True,size=90).encode( alt.X('x', axis=alt.Axis(title='')), alt.Y('y', axis=alt.Axis(title='')), tooltip='paper', opacity=alt.value(0.4), ) lines1 = alt.Chart(edges1).mark_line(color='lightblue').encode( alt.X('x', axis=alt.Axis(title='')), alt.Y('y', axis=alt.Axis(title='')), detail='edge', opacity=alt.value(0.2) ) chart1 = alt.LayerChart(layer=(lines1,bk1 + points1)).properties( height=350, width=450 ).interactive() ### Concatenando horizontamnete os gráficos 1 e 2 horiz_chart = alt.hconcat(chart, chart1 ).configure_axis( ticks=False, grid=False, domain=False, labels=False).configure_view( strokeWidth=0 ) return horiz_chart.to_json()
def get_altair_chart(df, x_col, y_cols='ALL', cat_col=None, sel_cols=None, sliders=None, ns_opacity=1.0, chart_title='', scheme='lightmulti', mark_type='line', sort_values=False, y_index=-1, stack=None): if mark_type == 'bar': chart = alt.Chart(df).mark_bar() elif mark_type == 'area': chart = alt.Chart(df).mark_area() else: chart = alt.Chart(df).mark_line(point=True, strokeWidth=2) sort_axis = 'x' x_col_ed = x_col if sort_values: x_col_ed = alt.X(f'{x_col}:N', sort='y') chart = chart.encode( x=x_col_ed, tooltip=list(df.columns), ).properties(width=600, height=400) #.interactive() if sliders: for key, value in sliders.items(): if key == 'min': comparisson = '>=' elif key == 'max': comparisson = '<=' else: print( f"Atenção: a chave '{key}' não é válida para a variável sliders. Usar apenas 'min' ou 'max'" ) continue if type(value) is list: slider_col = value[0] if len(value) > 1: init_value = value[1] else: init_value = eval(f'{key}(df[slider_col])') else: slider_col = value init_value = eval(f'{key}(df[slider_col])') if slider_col in df.columns: slider = alt.binding_range(min=min(df[slider_col]), max=max(df[slider_col]), step=1) slider_selector = alt.selection_single( bind=slider, name=key, fields=[slider_col], init={slider_col: init_value}) chart = chart.add_selection(slider_selector).transform_filter( f'datum.{slider_col} {comparisson} {key}.{slider_col}[0]') if y_cols == 'ALL': index = 1 if cat_col: index += 1 if sel_cols: index += len(sel_cols) y_cols = df.columns[index:].to_list() if len(y_cols) > 1: columns = y_cols y_col_name = 'Y_col' select_box = alt.binding_select(options=columns, name=y_col_name) sel = alt.selection_single(fields=[y_col_name], bind=select_box, init={y_col_name: y_cols[y_index]}) chart = chart.transform_fold(columns, as_=[y_col_name, 'Valor']).transform_filter(sel) if stack == 'normalize': chart = chart.encode(y=alt.Y("Valor:Q", stack="normalize"), ) elif stack == 'sum': chart = chart.encode(y='sum(Valor):Q', ) else: chart = chart.encode(y='Valor:Q', ) chart = chart.add_selection(sel) else: y_col = y_cols[0] chart = chart.encode(y=y_col) # TODO: adicionar filtro de range # lower = chart.properties( # height=60 # ).add_selection(brush) # chart = chart & lower if cat_col: base_cat = cat_col chart = chart.encode( color=alt.Color(base_cat, scale=alt.Scale(scheme=scheme)), #,legend=None), ) sel_base = alt.selection_multi(empty='all', fields=[base_cat], bind='legend') chart = chart.add_selection(sel_base).encode(opacity=alt.condition( sel_base, alt.value(1.0), alt.value(ns_opacity))) bar = alt.Chart(df).mark_bar().encode( y=alt.Y(f'{base_cat}:O', title=None), x='total', # tooltip='total', color=alt.condition( sel_base, alt.Color(f'{base_cat}:N', scale=alt.Scale(scheme=scheme)), alt.ColorValue("lightgrey"), legend=None)).add_selection(sel_base).properties(width=100, height=400) chart = alt.concat(chart, bar) # chart = chart & lower TODO: adicionar fltro de range select_cols = sel_cols if select_cols: options_lists = [ df[cat].dropna().astype(str).sort_values().unique().tolist() for cat in select_cols ] selection = alt.selection_single( name='Selecione', fields=select_cols, init={ cat: options_lists[i][0] for i, cat in enumerate(select_cols) }, bind={ cat: alt.binding_select(options=options_lists[i]) for i, cat in enumerate(select_cols) }) chart = chart.add_selection(selection).transform_filter(selection) return chart
# Create a 50-element time-series for each object timeseries = pd.DataFrame(np.random.randn(n_times, n_objects).cumsum(0), columns=locations['id'], index=pd.RangeIndex(0, n_times, name='time')) # Melt the wide-form timeseries into a long-form view timeseries = timeseries.reset_index().melt('time') # Merge the (x, y) metadata into the long-form view timeseries['id'] = timeseries['id'].astype(int) # make merge not complain data = pd.merge(timeseries, locations, on='id') # Data is prepared, now make a chart selector = alt.selection_single(empty='all', fields=['id']) base = alt.Chart(data).properties( width=250, height=250 ).add_selection(selector) points = base.mark_point(filled=True, size=200).encode( x='mean(x)', y='mean(y)', color=alt.condition(selector, 'id:O', alt.value('lightgray'), legend=None), ).interactive() timeseries = base.mark_line().encode( x='time', y=alt.Y('value', scale=alt.Scale(domain=(-15, 15))),
def get_histograms(df_scores_in, selected_score, selected_score_desc, selected_score_axis): # prepare dataframe df_scores = df_scores_in.copy() df_scores = df_scores[["date", "name", selected_score]] # throw other scores away df_scores = df_scores.dropna(axis=0, how="any") # remove rows with NaN df_scores = df_scores.groupby(["name", "date"]).mean().reset_index( ) # daily average in case of multiple datapoints per day df_scores["date"] = pd.to_datetime( df_scores["date"]) # make sure date column is datetime df_scores["date_str"] = df_scores["date"].apply( lambda x: x.strftime("%Y-%m-%d")) # date string column # use a date_id for lookup purposes dates = sorted(list(set(df_scores["date_str"]))) date2idx = {i: v for v, i in enumerate(dates)} def date2id(x): try: return date2idx[x] except: return np.nan df_scores["date_id"] = df_scores["date_str"].apply( lambda x: date2id(str(x))) # median datframe df_median = df_scores.groupby("date").median().reset_index() maxval = max(df_scores[selected_score]) maxval = 10 * np.ceil(maxval / 10) # plot title title = { "text": ["", "{}".format(selected_score_desc) ], # use two lines as hack so the umlauts at Ö are not cut off "subtitle": "EveryoneCounts.de", "color": "black", "subtitleColor": "lightgray", "subtitleFontSize": 12, "subtitleFontWeight": "normal", "fontSize": 15, "lineHeight": 5, } # special treatment for webcam score b/c it uses absolute values if selected_score == "webcam_score": scale = alt.Scale(domain=(1.05 * maxval, 0), scheme="blues") bin = alt.Bin(extent=[0, maxval], step=maxval / 20) elif selected_score == "tomtom_score": scale = alt.Scale(domain=(200, 0), scheme="redyellowgreen") bin = alt.Bin(extent=[0, max(50, maxval)], step=max(50, maxval) / 20) else: scale = alt.Scale(domain=(200, 0), scheme="redyellowgreen") bin = alt.Bin(extent=[0, max(200, maxval)], step=max(200, maxval) / 20) # Here comes the magic: a selector! selector = alt.selection_single(empty="none", fields=['date_id'], on='mouseover', nearest=True, init={'date_id': len(dates) - 2}) #--- Altair charts from here on ---# # Histogram chart chart = alt.Chart(df_scores).mark_bar( #clip=True ).encode( alt.X(selected_score + ":Q", title=selected_score_axis, bin=bin), alt.Y( 'count():Q', title="Anzahl Landkreise", ), color=alt.Color( selected_score + ":Q", scale=scale, legend=None, ), ).transform_filter(selector).properties(width='container', height=300, title=title) # Rule at 100% rule100 = alt.Chart(df_scores).mark_rule( color='lightgray', size=3).encode(x="a:Q").transform_calculate(a="100") # Rule for the median rulemedian = alt.Chart(df_median).mark_rule(color='#F63366').encode( x=selected_score + ":Q", size=alt.value(3), tooltip=[alt.Tooltip(selected_score + ':Q', title="Median")]).transform_filter(selector) # median plot median_points = alt.Chart(df_median).mark_point( filled=True, size=150, color="gray", ).encode( alt.X("date:T", axis=alt.Axis(title='Datum', format=("%d %b"))), alt.Y(selected_score + ':Q', title="Median " + selected_score_axis)).properties(width='container', height=180, title={ "text": "Wähle ein Datum:", "color": "black", "fontWeight": "normal", "fontSize": 12 }) selectorchart = alt.Chart(df_median).mark_point().encode( x='date:T', opacity=alt.value(0), tooltip=[ alt.Tooltip("date:T", title="Datum", format=("%A %d %B")), alt.Tooltip(selected_score + ":Q", title="Median") ]).add_selection(selector) median_line = alt.Chart(df_median).mark_line( point=False, color="gray", size=1).encode( alt.X("date:T"), alt.Y(selected_score + ':Q'), ).properties(width='container', ) median_selected = alt.Chart(df_median).mark_point( filled=True, size=400, color="#F63366", opacity=0.7).encode( alt.X("date:T"), alt.Y(selected_score + ':Q'), ).properties(width='container', ).transform_filter(selector) median_selected_rule = alt.Chart(df_median).mark_rule( point=False, color="gray", size=1, opacity=1).encode(alt.X("date:T"), ).properties( width='container', ).transform_filter(selector) median_selected_rule2 = alt.Chart(df_median).mark_rule( point=False, color="#F63366", size=1, opacity=1).encode(alt.Y(selected_score + ":Q"), ).properties( width='container', ).transform_filter(selector) if selected_score in ["airquality_score", "webcam_score", "tomtom_score"]: chart_top = chart + rulemedian else: chart_top = rule100 + chart + rulemedian chart_bottom = median_selected_rule + median_line + median_points + median_selected + median_selected_rule2 + selectorchart return chart_top & chart_bottom
plt.title("Wealth Distribution by Income") # %% # alternative way to plot equilibrium import altair as alt df = eq.as_df() spec = alt.Chart(df).mark_line().encode(x='a', y='μ', color='i_m:N') spec # %% # alternative way to plot equilibrium (with some interactivity) # TODO: function to generate it automatically. import altair as alt single = alt.selection_single(on='mouseover', nearest=True) df = eq.as_df() ch = alt.Chart(df) spec = ch.properties(title='Distribution', height=100).mark_line().encode( x='a', y='μ', color=alt.condition(single, 'i_m:N', alt.value('lightgray')) ).add_selection(single) + ch.mark_line(color='black').encode( x='a', y='sum(μ)') & ch.properties( title='Decision Rule', height=100).mark_line().encode( x='a', y='i', color=alt.condition(single, 'i_m:N', alt.value('lightgray'))).add_selection(single) # %% # Resulting object can be saved to a file. (try to open this file in jupyterlab)
my_theme = alt.themes.get()() # Get current theme as dict. my_theme.setdefault('encoding', {}).setdefault('color', {})['scale'] = { 'scheme': 'bluepurple', } alt.themes.register('my_theme', lambda: my_theme) alt.themes.enable('my_theme') # In[90]: start = df_comp.marg_imp.min() end = df_comp.marg_imp.max() # In[91]: selector = alt.selection_single(on='mouseover', nearest=True, empty='all', fields=['base_seg_id']) # In[92]: base = alt.Chart(df_comp).mark_point(filled=True).encode( alt.X('Coef_value'), alt.Y('cr'), size=alt.Size('impressions', scale=alt.Scale(domain=[100, 100000])), color=alt.Color('marg_imp', scale=alt.Scale(scheme='bluepurple', domain=[start, end])), tooltip=[ alt.Tooltip('base_seg_id'), alt.Tooltip('Coef_value'), alt.Tooltip('marg_imp') ],
def tl_summary(df, values, time, bars, col, text, title='', bars_w=810, bars_h=200, bars_stack='zero', timeline_w=450, timeline_h=200, slope_avg='Average', slope_w=300, slope_h=200, slope_y_pos=10, palette='tableau10'): ''' Plots 3 charts: bars, timeline and slopegraph Parameters ---------- df : pandas.DataFrame values : str Name of the column used for values. time : str Name of the column used for time values. bars : str Name of the column used to plot as X-axis on the bars. col : str Name of the column used for colors. text : str Name of the column used to show text on slopegraph. title : str Title of the plot. bars_w : int Bars plot width. bars_h : int Bars plot height. timeline_w : int Timeline plot width. timeline_h : int Timeline plot height. slope_avg : str Title for the avg measures on slopegraph. slope_w : int Slopegraph plot width. slope_h : int Slopegraph plot height. slope_y_pos : int Slopegraph titles position. palette : str Check https://vega.github.io/vega/docs/schemes/#reference Returns ------- altair.Chart ''' df = df.copy() df['slope_x'] = 'measures' df_avg = df.groupby([col, time]).mean().reset_index() df_avg[bars] = slope_avg df_avg['slope_x'] = 'averages' df = pd.concat([df, df_avg], ignore_index=True, sort=True) df[values] = df[values].round(2) df['slope_text'] = df[values].astype(str) + ' ' + df[col] max_time = df[time].max() orders = (df[df[time] == max_time].groupby(bars)[values].sum().sort_values( ascending=False).index.tolist()) orders.remove(slope_avg) filter_in = alt.selection_single(fields=[bars], on='mouseover', empty='none') base = alt.Chart(df) barsplot = base.mark_bar().encode( alt.X(f'{bars}:N', title=None, scale=alt.Scale(domain=orders)), alt.Y(f'{values}:Q', title=text, stack=bars_stack), alt.Color(col, legend=alt.Legend(orient='bottom-left', title=None), scale=alt.Scale(scheme=palette)), opacity=alt.condition( filter_in, alt.value('1'), alt.value('0.6'))).transform_filter({ 'and': [f'datum.{time} == {max_time}', 'datum.slope_x == "measures"'] }).properties(title=title, selection=filter_in, width=bars_w, height=bars_h) timeline_base = base.mark_line().encode( alt.X(f'{time}:O'), alt.Y(f'{values}:Q', title=text, scale=alt.Scale(domain=[df[values].min(), df[values].max()])), alt.Color(col, legend=None)).properties(width=timeline_w, height=timeline_h) timeline = timeline_base.transform_filter(filter_in) timeline += timeline.mark_circle(size=25) timeline_avg = timeline_base.mark_line( strokeDash=[4, 2], opacity=0.45).transform_filter(f'datum.{bars} == {slope_avg!r}') slope = _build_slope(df, values, time, bars, col, text, filter_in, slope_y_pos, slope_w, slope_h) chart = barsplot & ((timeline_avg + timeline) | slope) return chart
def main(): #connection config db_conn = get_connection('./billboard-200.db') all_data = get_data(db_conn, 'acoustic_features') #Paragrah-Intro st.title( "Explore the acoustic and meta features of albums and songs by David Bowie" ) st.header("Introduction") st.write( "This 3-week project is for the Interactive Data Science - (Spring 2021) course under Adam Perer and Hendrik Strobelt, created by Vivian Young and Carol Ho. After navigating the Acoustic and meta-features of albums and songs data from Spotify, with 340,000 rows containing acoustic data for tracks from Billboard 200 albums from 1/5/1963 to 1/19/2019. We're intrigued by the feature label on each piece - the danceability, the energy, the beats, and the valence. To better explore the feature label's trend and distribution, we decided on David Bowie's work. They are well-known for their diverse music styles, and their creation has been influential since the 60s till now." ) st.write( "The project consists of three parts; the first is the charts that allow the user to read each song's features and how albums distribute these features. The second part is a comparison of the albums' features with the overall music features by decade. With the holistic understanding of Bowie's work, the last part is an interactive search function that allows the users to search for their music by features." ) st.write( " The analysis results aim to provide a different view of interpreting the albums and songs. Moreover, besides searching for particular songs or albums, how might we help users find the pieces that better fit the context and mood?" ) st.write( "The original dataset: https://components.one/datasets/billboard-200/") #checkbox-original dataset agree = st.checkbox('show original data.(David Bowie)') if agree: st.text( 'original data set - accoustic features of songs of David Bowie from 1969-2018' ) st.dataframe(all_data) st.markdown( "```SELECT * FROM EMP JOIN DEPT ON EMP.DEPTNO = DEPT.DEPTNO;```") #Paragraph-Intro to Features st.header("Intro to Features") st.write( "Spotify labeled the songs with features to maximize the recommendation result. We picked the below features that are more relevant to the use-case of a music listener." ) st.markdown( ":point_right:Danceability: Describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity." ) st.markdown( ":point_right:Energy: Represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale." ) st.markdown( ":point_right:Instrumentalness: Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”." ) st.markdown( ":point_right:Tempo: The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece, and derives directly from the average beat duration." ) st.markdown( ":point_right:Valence: Describes the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry)." ) #select-feature option = st.selectbox('What feature are you interested in dicover?', ('danceability', 'energy', 'instrumentalness')) #connection config all_dacade_avg = get_all_decade_avg(db_conn, option) bowie_data = get_bowie_data(db_conn, option) #Paragrah-Chart 1 st.header("Scatter Chart-David Bowie's albums") st.subheader( ":musical_note: How the selected feature shapes the distribution of songs by album?" ) st.markdown( "Instruction: The slider allows you to zoom in albums by issued year, and clicking on the valence allows you to see the distribution of songs, from high valence(Happy) to low valence(Sad)." ) #slider-year start_year = st.slider("Show me the albums within these issued year!", 1969, 2018, (1969, 2000)) filtered_data1 = bowie_data[start_year[0] < bowie_data['date'].dt.year] filtered_data2 = filtered_data1[ filtered_data1['date'].dt.year <= start_year[1]] select_scatter = alt.selection_multi(fields=['valence'], bind='legend') #color palette for scatter chart range_ = [ '#D64550', '#EE8189', '#FC8B4A', '#F7B801', '#B9F18C', '#71DA1B', '#439A86', '#00BECC', '#7678ED', '#3D348B' ] #chart-scatter scatter = alt.Chart(filtered_data2).mark_circle().encode( alt.X('album', scale=alt.Scale(zero=True), sort={ "field": "date", "order": "ascending" }, title="Albums order by Issued Date"), alt.Y(option, scale=alt.Scale(zero=True), title=option), alt.Color('valence:O', sort='descending', scale=alt.Scale(range=range_)), tooltip=['album', 'song', 'date', option, 'valence', 'tempo'], size=alt.Size('tempo', scale=alt.Scale(domain=[0, 100], range=[1, 200]), legend=alt.Legend(values=[50, 100, 150, 200])), opacity=alt.condition(select_scatter, alt.value(1), alt.value(0.1))).properties( width=1200, height=900, ).add_selection(select_scatter) st.write(scatter) #checkbox-bowie's album agree = st.checkbox('show original dataset.', key='album') if agree: st.text( 'original data set - accoustic features of songs from 1969-2018') st.dataframe(bowie_data) #Paragrah-Chart 2 st.header("Bar Chart-David Bowie's albums with average features") st.subheader( ":musical_note: How is the feature of the album different from the songs in that decade?" ) st.markdown( "Instruction: Click on the checkbox to compare with the songs at that decade. Click on the bar for highlight." ) #chart-bar selector = alt.selection_single(empty='all', fields=['album']) bar_album = alt.Chart(all_dacade_avg).mark_bar( color='#1FC3AA', opacity=0.5, thickness=10).encode( alt.X('album', sort={ "field": "date", "order": "ascending" }, title="(B)Albums order by Issued Date"), alt.Y('avg_feature', scale=alt.Scale(zero=False), title='(B)Average_' + option + '_by_Albums'), tooltip=['album', 'date', 'avg_feature'], color=alt.condition(selector, 'album:O', alt.value('lightgray'), legend=None), ).properties( width=1200, height=600, ).add_selection(selector) #chart-decade-the numbers #chart-decade bar_decade = alt.Chart(all_dacade_avg).mark_bar( color='#8624F5', opacity=0.5, thickness=10).encode( alt.X('album', sort={ "field": "date", "order": "ascending" }, title="(V)The correspondent decade of albums Issued Date"), alt.Y('trend_feature', scale=alt.Scale(zero=False), title='(V)Average_' + option + '_by_Decade'), ).properties( width=1200, height=600, ) #.add_selection(selector) #chart-decade-the numbers text_decade = bar_decade.mark_text(align='center', color='white', dy=80).encode(text='avg_feature:N') #checkbox-chart comparison agree = st.checkbox('Compare the\n' + option + '\nof the albums with the average\n' + option + '\nof songs by decede.') if agree: st.write(bar_decade + bar_album + text_decade) else: st.write(bar_album) agree = st.checkbox('show original dataset.', key='decade') if agree: st.text( 'David Bowie album average feature and all songs averge feature by decade' ) st.dataframe(all_dacade_avg) #Paragrah-Search st.header("Search with features!") st.subheader(":musical_note: What are the songs that fit my mood?") st.markdown("Instruction: blablabla.")
def slope_comparison(df, values, bars, col, text, bars_w=200, bars_h=515, slope_avg='Average', slope_w=350, slope_h=240, slope_y_pos=10, slope_y_title=None): ''' Plots 3 charts: v-bars and 2 slopegraph for comparison. Parameters ---------- df : pandas.DataFrame values : str Name of the column used for values. bars : str Name of the column used to plot as X-axis on the bars. col : str Name of the column used for colors. text : str Name of the column used to show text on slopegraph. bars_w : int Bars plot width. bars_h : int Bars plot height. slope_avg : str Title for the avg measures on slopegraph. slope_w : int Slopegraph plot width. slope_h : int Slopegraph plot height. slope_y_pos : int Slopegraph titles position. slope_y_title : str Title to use on slope y axis. Returns ------- altair.Chart Parameters ---------- df : pandas.DataFrame vs : str list List of variables to include in the plot. year : int Year to extract from data. custom_fn : function Function to apply to df after formatting. Use it to format names on the df. kwargs : arguments passed to get_data_series ''' df = df.copy() df['slope_x'] = 'measures' df_avg = df.groupby(col).mean().reset_index() df_avg[bars] = slope_avg df_avg['slope_x'] = 'averages' df = pd.concat([df, df_avg], ignore_index=True, sort=True) df[values] = df[values].round(2) df['slope_text'] = df[values].astype(str) + ' ' + df[col] mouse = alt.selection_single(on='mouseover', fields=[bars], empty='none', nearest=True) click = alt.selection_single(fields=[bars], empty='none') base = alt.Chart(df) barsplot = base.mark_point(filled=True).encode( alt.X(f'mean({values})', scale=alt.Scale(zero=False), axis=alt.Axis(title=None)), alt.Y(f'{bars}:N', axis=alt.Axis(title=None)), size=alt.condition(mouse, alt.value(400), alt.value( 200))).transform_filter('datum.slope_x == "measures"').properties( selection=mouse, width=bars_w, height=bars_h) barsplot += barsplot.encode( size=alt.condition(click, alt.value(350), alt.value(200)), color=alt.condition(click, alt.ColorValue('#800000'), alt.value('#879cab'))).properties(selection=click) bars_ci = base.mark_rule().encode( x=f'ci0({values})', x2=f'ci1({values})', y=f'{bars}:N').transform_filter( 'datum.slope_x == "measures"').properties(width=bars_w, height=bars_h) slope_mouse = _build_slope(df, values, None, bars, col, text, mouse, slope_y_pos, slope_w, slope_h, slope_y_title) slope_click = _build_slope(df, values, None, bars, col, text, click, slope_y_pos, slope_w, slope_h) chart = (bars_ci + barsplot) | (slope_mouse & slope_click) return chart
def pdp_plot_filter(filter_in, df, rows, columns, values, variables, clusters=True, cluster_centers=3, cluster_lines=True, columns_type='N', x_title=None, y_title=None, width=700, height=400): df = df.copy() def get_lines(data, stroke_w, color, selection=None, **kwargs): lines = alt.Chart(data).mark_line( strokeWidth=stroke_w, **kwargs).encode( alt.X(f'{columns}:{columns_type}', title=x_title, axis=alt.Axis(minExtent=30)), alt.Y(values, title=y_title), alt.Opacity(rows, legend=None), alt.ColorValue(color)).transform_filter(filter_in).properties( width=width, height=height) if selection: lines = lines.encode(size=alt.condition( selection, alt.value(stroke_w * 2), alt.value(stroke_w))).properties( selection=selection) return lines if clusters: mouseover_cluster = alt.selection_single(on='mouseover', fields=[rows], empty='none', nearest=True) df_clusters = utils.pdp_clusters(cluster_centers, df, rows, columns, values, variables) background = get_lines(df_clusters, 2, '#468499', selection=mouseover_cluster) else: background = get_lines(df, 1, '#bbbbbb') if cluster_lines: # mouseover_lines = alt.selection_single(on='mouseover', fields=[rows], empty='none', nearest=True) background = get_lines(df, 1, '#bbbbbb', strokeDash=[2, 2 ]) + background df_avg = df.groupby([columns, variables])[values].mean().reset_index() avg_base = alt.Chart(df_avg).encode( alt.X(f'{columns}:{columns_type}', title=x_title), alt.Y(values, title=y_title), ).transform_filter(filter_in) avg = avg_base.mark_line(strokeWidth=5, color='gold') avg += avg_base.mark_line(strokeWidth=2) avg += avg_base.mark_point(filled=True, size=55) return background + avg
import altair as alt import pandas as pd import numpy as np # generate fake data source = pd.DataFrame({'gender': ['M']*1000 + ['F']*1000, 'height':np.concatenate((np.random.normal(69, 7, 1000), np.random.normal(64, 6, 1000))), 'weight': np.concatenate((np.random.normal(195.8, 144, 1000), np.random.normal(167, 100, 1000))), 'age': np.concatenate((np.random.normal(45, 8, 1000), np.random.normal(51, 6, 1000))) }) selector = alt.selection_single(empty='all', fields=['gender']) color_scale = alt.Scale(domain=['M', 'F'], range=['#1FC3AA', '#8624F5']) base = alt.Chart(source).properties( width=250, height=250 ).add_selection(selector) points = base.mark_point(filled=True, size=200).encode( x=alt.X('mean(height):Q', scale=alt.Scale(domain=[0,84])), y=alt.Y('mean(weight):Q', scale=alt.Scale(domain=[0,250])), color=alt.condition(selector,
columns=["order"]) medal_count_year = medal_count_year.sort_values(by=["Year", "Medal"], ascending=[True, False ]).reset_index() medal_count_year["Order"] = count_year # merge count with order and count medals by categories medal_count_year_withCate = pd.merge(medal_count_year, merge_medalCate, how='outer', on=["Year", "NOC"]) # Altair part slider_year = alt.binding_range(min=1896, max=2016, step=4, name='Year:') selector_year = alt.selection_single(fields=['Year'], bind=slider_year, init={'Year': 2016}) select_country = alt.selection(type="single", fields=['Year']) sphere = alt.sphere() graticule = alt.graticule() background1 = alt.Chart(sphere).mark_geoshape(fill='lightgray') background2 = alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5) chart_medal_year = alt.Chart(medal_count_year_withCate).mark_geoshape( stroke='darkgray').encode( color=alt.Color(field="Medal", type="quantitative", scale=alt.Scale(type="sqrt"), legend=alt.Legend(title="Medals", labelFontSize=15,
def plot_interactive_histograms_sm(): data1 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-01-01.csv' ) data2 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-02-01.csv' ) data3 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-03-01.csv' ) data4 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-04-01.csv' ) data5 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-05-01.csv' ) data6 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-06-01.csv' ) data7 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-07-01.csv' ) data8 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-08-01.csv' ) data9 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-09-01.csv' ) data10 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-10-01.csv' ) data11 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-11-01.csv' ) data12 = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-12-01.csv' ) node_info = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/Data/Node_List.csv' ) time_summary = pd.read_csv( 'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/summary_data.csv' ) data1['month'] = 'January' data2['month'] = 'February' data3['month'] = 'March' data4['month'] = 'April' data5['month'] = 'May' data6['month'] = 'June' data7['month'] = 'July' data8['month'] = 'August' data9['month'] = 'September' data10['month'] = 'October' data11['month'] = 'November' data12['month'] = 'December' data1 = pd.merge(data1, node_info, how='inner', left_on='NodeID', right_on='ID') data2 = pd.merge(data2, node_info, how='inner', left_on='NodeID', right_on='ID') data3 = pd.merge(data3, node_info, how='inner', left_on='NodeID', right_on='ID') data4 = pd.merge(data4, node_info, how='inner', left_on='NodeID', right_on='ID') data5 = pd.merge(data5, node_info, how='inner', left_on='NodeID', right_on='ID') data6 = pd.merge(data6, node_info, how='inner', left_on='NodeID', right_on='ID') data7 = pd.merge(data7, node_info, how='inner', left_on='NodeID', right_on='ID') data8 = pd.merge(data8, node_info, how='inner', left_on='NodeID', right_on='ID') data9 = pd.merge(data9, node_info, how='inner', left_on='NodeID', right_on='ID') data10 = pd.merge(data10, node_info, how='inner', left_on='NodeID', right_on='ID') data11 = pd.merge(data11, node_info, how='inner', left_on='NodeID', right_on='ID') data12 = pd.merge(data12, node_info, how='inner', left_on='NodeID', right_on='ID') all_data = pd.concat([ data1, data2, data3, data4, data5, data6, data7, data8, data9, data10, data11, data12 ]) months = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ] layers = ['Algorithm', 'Physical', 'Task', 'Function', 'Information'] input_dropdown = alt.binding_select(options=months) selection = alt.selection_single(fields=['month'], bind=input_dropdown, name='Month') layer_dropdown = alt.binding_select(options=layers) layer_selection = alt.selection_single(fields=['Layer'], bind=layer_dropdown, name='Layer') cn = alt.Chart(all_data).mark_bar().encode( x=alt.X('Cn:Q', bin=alt.Bin(maxbins=20), title='Connectedness Rating'), y=alt.Y('count()', title='Number of Nodes'), color=alt.value('#4e79a7')).add_selection(selection).transform_filter( selection).add_selection(layer_selection).transform_filter( layer_selection) rn = alt.Chart(all_data).mark_bar().encode( x=alt.X('Rn:Q', bin=alt.Bin(maxbins=20), title='Reliability Rating'), y=alt.Y('count()', title='Number of Nodes'), color=alt.value('#f28e2b')).add_selection(selection).transform_filter( selection).add_selection(layer_selection).transform_filter( layer_selection) id = alt.Chart(all_data).mark_bar().encode( x=alt.X('Id:Q', bin=alt.Bin(maxbins=20), title='Interdependency Rating'), y=alt.Y('count()', title='Number of Nodes'), color=alt.value('#e15759')).add_selection(selection).transform_filter( selection).add_selection(layer_selection).transform_filter( layer_selection) chart = alt.hconcat(cn, rn, id) #(cn | rn | id) chart.serve() return
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import altair as alt import pandas as pd from vega_datasets import data as vega_data morse = pd.read_csv( 'https://raw.githubusercontent.com/jvelleu/649_final_project/7648fb227384b3dad2707efdf8fb960f502b52ad/morse_data.csv', encoding='utf-8') mouseSelection = alt.selection_single(on="mouseover", nearest=True, empty='none') opacityCondition = alt.condition(mouseSelection, alt.value(1.0), alt.value(0.6)) scatter1 = alt.Chart( morse, width=400, height=400).mark_point(filled=True).encode( alt.X("x", title="", axis=None), alt.Y("y", title="", axis=None), alt.Tooltip(["char", "code"], title=None), alt.Size("components:O")).add_selection(mouseSelection).encode( opacity=opacityCondition) scatter2 = alt.Chart( morse, width=400, height=400).mark_point(filled=True).encode( alt.X("x", title="", axis=None),
def plot_iroas_over_time(iroas_df: pd.DataFrame, experiment_dates: pd.DataFrame, cooldown_date: pd.DataFrame): """Returns a chart of the iROAS estimate over time with confidence bands. This function provides a visualization of the evolution of the iROAS estimate over the duration of the experiment and cooldown, together with confidence bands. Args: iroas_df: a dataframe with columns: date, lower, mean, upper experiment_dates: dataframe with columns (date, color) which contains two dates for each period (start, end), and the column color is the label used in the chart to refer to the corresponding period, e.g. "Experiment period" or "Pretes period". cooldown_date: dataframe with column (date, color) with only one entry, where date indicates the last day in the cooldown period, and color is the label used in the plot legend, e.g. "End of cooldown period". Returns: iroas_chart: Chart containing the plot. """ iroas_base = alt.Chart(iroas_df).mark_line().encode( x=alt.X('date:T', axis=alt.Axis(title='', format=('%b %e')))) iroas_selection = alt.selection_single(fields=['date'], nearest=True, on='mouseover', empty='none', clear='mouseout') iroas_lines = iroas_base.mark_line().encode( y=alt.Y('mean:Q', axis=alt.Axis(title=' ', format='.1'))) iroas_points = iroas_lines.mark_point().transform_filter(iroas_selection) iroas_rule1 = iroas_base.mark_rule().encode( tooltip=['date:T', 'mean:Q', 'lower:Q', 'upper:Q']) iroas_rule = iroas_rule1.encode( opacity=alt.condition(iroas_selection, alt.value(0.3), alt.value( 0))).add_selection(iroas_selection) iroas_ci_bands_rule = alt.Chart(iroas_df).mark_area(color='gray').encode( alt.X('date:T'), y='lower:Q', y2='upper:Q', opacity=alt.value(0.5)) date_rule = alt.Chart(experiment_dates[ experiment_dates['color'] == 'Experiment period']).mark_rule( strokeWidth=2).encode(x='date:T', color=alt.Color('color', scale=alt.Scale(domain=[ 'Experiment period', 'End of cooldown period', 'iROAS estimate' ], range=[ 'black', 'black', '#1f77b4' ]))) cooldown_date_rule = alt.Chart(cooldown_date).mark_rule( strokeWidth=2, strokeDash=[5, 2], color='black').encode(x='date:T', color='color:N') # Compile chart iroas_chart = alt.layer(iroas_lines, iroas_rule, iroas_points, date_rule, cooldown_date_rule, iroas_ci_bands_rule) return iroas_chart
def chart(self): df = self.clean_ticker(self.transform(self.read_curated())) # I don't think this even f*****g works but whatever. cheesed df = df.query("ticker not in @self.words") # used in data table df["date_str"] = df["created"].map( lambda x: x.strftime("%Y-%m-%d %H:%M")) # used for date filters df["date"] = df["created"].map(lambda x: x.strftime("%Y-%m-%d")) df["date2"] = df["created"].map(lambda x: x.strftime("%Y-%m-%d")) data_start = df["date"].min() data_end = df["date"].max() # DATETIME RANGE FILTERS # https://github.com/altair-viz/altair/issues/2008#issuecomment-621428053 range_start = alt.binding(input="date") range_end = alt.binding(input="date") select_range_start = alt.selection_single(name="start", fields=["date"], bind=range_start, init={"date": data_start}) select_range_end = alt.selection_single(name="end", fields=["date"], bind=range_end, init={"date": data_end}) # slider timestamp is javascript timestamp. not human readable # slider = alt.binding_range( # min=self.timestamp(min(self.datelist)), # max=self.timestamp(max(self.datelist)), # step=1, name='Created Date' # ) # slider_selection = alt.selection_single( # name="SelectorName", fields=['created_date'], # bind=slider, init={'created': self.timestamp('2021-01-01')} # ) # count slider filter max_count = df.groupby(["ticker"])["ticker"].count().max() slider_max = alt.binding_range(min=0, max=max_count, step=1) slider_min = alt.binding_range(min=0, max=max_count, step=1) select_max_count = alt.selection_single(name='ticker_max', fields=['count'], bind=slider_max, init={"count": max_count}) select_min_count = alt.selection_single(name='ticker_min', fields=['count'], bind=slider_min, init={"count": 0}) # zoom = alt.selection_interval(bind='scales') selector = alt.selection_single(empty='all', fields=['ticker']) base = alt.Chart(df.reset_index()).transform_filter( # slider_selection (alt.datum.date2 >= select_range_start.date) & (alt.datum.date2 <= select_range_end.date)).add_selection( selector, select_range_start, select_range_end, select_max_count, select_min_count, ) # BAR CHART # https://stackoverflow.com/questions/52385214/how-to-select-a-portion-of-data-by-a-condition-in-altair-chart bars = base.mark_bar( ).transform_aggregate(count='count()', groupby=['ticker']).encode( x=alt.X( 'ticker', # https://altair-viz.github.io/gallery/bar_chart_sorted.html sort="-y", axis=alt.Axis(title='Stock Tickers')), y=alt.Y( "count:Q", axis=alt.Axis(title='Number of Mentions'), # scale=alt.Scale(zero=False) ), color=alt.condition(selector, 'id:O', alt.value('lightgray'), legend=None), tooltip=['ticker', 'count:Q'], ).properties( # title="Stock Ticker mentions on r/wallstreetbets", width=1400, height=400).transform_filter( (alt.datum.count <= select_max_count.count) & (alt.datum.count >= select_min_count.count)) # base chart for data tables # href: https://altair-viz.github.io/gallery/scatter_href.html ranked_text = base.transform_calculate( # url='https://www.reddit.com' + alt.datum.permalink url=alt.datum.built_url ).mark_text( align='left', dx=-12, # dy=0, color="white", strokeWidth=0, strokeOpacity=0, ).encode( y=alt.Y('row_number:O', axis=None), href='url:N', tooltip=['url:N'], # color=alt.condition(selector,'id:O',alt.value('lightgray'),legend=None), ).transform_window( # groupby=["ticker"], # causes overlap # https://altair-viz.github.io/user_guide/generated/core/altair.SortField.html#altair.SortField sort=[ alt.SortField("score", "descending"), alt.SortField("created", "descending"), ], row_number='row_number()').transform_filter( selector).transform_window( rank='rank(row_number)').transform_filter( # only shows up to 20 rows alt.datum.rank < 20).properties(width=30, height=300) # Data Tables created = ranked_text.encode(text='date_str').properties( title='Created Date') ticker = ranked_text.encode(text='ticker').properties( title='Stock Ticker') score = ranked_text.encode(text='score').properties(title='Upvotes') title = ranked_text.encode(text="title" if "title" in self. cols_with_ticker else "comment").properties( title='Submission Title' if "title" in self.cols_with_ticker else 'Comment') # Combine data tables text = alt.hconcat(created, ticker, score, title) # Build final chart chart = alt.vconcat( bars, text, # autosize="fit" ).resolve_legend(color="independent") self.save_semantic_chart(chart.to_json(indent=None)) return
import altair as alt from vega_datasets import data source = data.cars() input_dropdown = alt.binding_select(options=['Europe', 'Japan', 'USA']) selection = alt.selection_single(fields=['Origin'], bind=input_dropdown, name='Country of ') color = alt.condition(selection, alt.Color('Origin:N', legend=None), alt.value('lightgray')) # alt.Chart(source).mark_point().encode( # x='Horsepower:Q', # y='Miles_per_Gallon:Q', # color=color, # tooltip='Name:N' # ).add_selection( # selection # ) vega = alt.Chart(source).mark_circle(size=60).encode( x='Horsepower', y='Miles_per_Gallon', color=color, ).add_selection(selection) vega.save('a.html')
def map_state_slider(state_txt, state_counties, confirmed, confirmed_min, confirmed_max, deaths, deaths_min, deaths_max, state_fips): # Pivot confirmed data by day_num confirmed_pv = confirmed[['fips', 'day_num', 'confirmed']].copy() confirmed_pv['fips'] = confirmed_pv['fips'].astype(str) confirmed_pv['day_num'] = confirmed_pv['day_num'].astype(str) confirmed_pv['confirmed'] = confirmed_pv['confirmed'].astype('int64') confirmed_pv = confirmed_pv.pivot_table(index='fips', columns='day_num', values='confirmed', fill_value=0).reset_index() # Pivot deaths data by day_num deaths_pv = deaths[['lat', 'long_', 'day_num', 'deaths']].copy() deaths_pv['day_num'] = deaths_pv['day_num'].astype(str) deaths_pv['deaths'] = deaths_pv['deaths'].astype('int64') deaths_pv = deaths_pv.pivot_table(index=['lat', 'long_'], columns='day_num', values='deaths', fill_value=0).reset_index() # Extract column names for slider column_names = confirmed_pv.columns.tolist() # Remove first element (`fips`) column_names.pop(0) # Convert to int column_values = [None] * len(column_names) for i in range(0, len(column_names)): column_values[i] = int(column_names[i]) # Disable max_rows to see more data alt.data_transformers.disable_max_rows() # Topographic information us_states = alt.topo_feature(topo_usa, 'states') us_counties = alt.topo_feature(topo_usa, 'counties') # state county boundaries base_state = alt.Chart(state_counties).mark_geoshape( fill='white', stroke='lightgray', ).properties( width=800, height=600, ).project(type='mercator') # Slider choices min_day_num = column_values[0] max_day_num = column_values[len(column_values) - 1] slider = alt.binding_range(min=min_day_num, max=max_day_num, step=1) slider_selection = alt.selection_single(fields=['day_num'], bind=slider, name="day_num", init={'day_num': min_day_num}) # Confirmed cases by county base_state_counties = alt.Chart(us_counties).mark_geoshape( stroke='black', strokeWidth=0.05).transform_lookup( lookup='id', from_=alt.LookupData(confirmed_pv, 'fips', column_names) ).transform_fold(column_names, as_=[ 'day_num', 'confirmed' ]).transform_calculate( state_id="(datum.id / 1000)|0", day_num='parseInt(datum.day_num)', confirmed='isValid(datum.confirmed) ? datum.confirmed : -1' ).encode(color=alt.condition( 'datum.confirmed > 0', alt.Color('confirmed:Q', scale=alt.Scale(domain=(confirmed_min, confirmed_max), type='symlog')), alt.value('white') )).properties( # update figure title title=f'COVID-19 WA State Confirmed Cases by County' ).transform_filter((alt.datum.state_id ) == state_fips).transform_filter(slider_selection) # deaths by long, latitude points = alt.Chart(deaths_pv).mark_point( opacity=0.75, filled=True).transform_fold( column_names, as_=['day_num', 'deaths']).transform_calculate( day_num='parseInt(datum.day_num)', deaths='isValid(datum.deaths) ? datum.deaths : -1').encode( longitude='long_:Q', latitude='lat:Q', size=alt.Size('deaths:Q', scale=alt.Scale(domain=(deaths_min, deaths_max), type='symlog'), title='deaths'), color=alt.value('#BD595D'), stroke=alt.value('brown'), ).add_selection(slider_selection).transform_filter( slider_selection) # confirmed cases (base_counties) and deaths (points) return (base_state + base_state_counties + points)
def selectors_figure(text, o_html, full_pds, ts, ts_step, decays, decays_step, knns, knns_step): subtext = ['Parameters:'] tooltip = ['sample_name', 'PHATE1', 'PHATE2'] circ = alt.Chart(full_pds).mark_point(size=20).encode(x='PHATE1:Q', y='PHATE2:Q') if knns_step: slider_knns = alt.binding_range(min=min(knns), max=max(knns), step=knns_step, name='knn') selector_knns = alt.selection_single(name="knn", fields=['knn'], bind=slider_knns, init={'knn': min(knns)}) tooltip.append('knn') circ = circ.add_selection(selector_knns).transform_filter( selector_knns) subtext.append('knn ("k") = %s\n' % ', '.join(map(str, knns))) if decays_step: slider_decays = alt.binding_range(min=min(decays), max=max(decays), step=decays_step, name='decay') selector_decays = alt.selection_single(name="decay", fields=['decay'], bind=slider_decays, init={'decay': min(decays)}) tooltip.append('decay') circ = circ.add_selection(selector_decays).transform_filter( selector_decays) subtext.append('decay ("alpha") = %s\n' % ', '.join(map(str, decays))) if ts_step: slider_ts = alt.binding_range(min=min(ts), max=max(ts), step=ts_step, name='t:') selector_ts = alt.selection_single(name="t", fields=['t'], bind=slider_ts, init={'t': min(ts)}) tooltip.append('t') circ = circ.add_selection(selector_ts).transform_filter(selector_ts) subtext.append('t = %s\n' % ', '.join(map(str, ts))) has_cats = 0 has_nums = 0 if 'variable' in full_pds.columns: dtypes_set = set(full_pds['dtype']) if 'categorical' in dtypes_set: cats = full_pds.loc[full_pds.dtype == 'categorical'] cats_init = sorted( [x for x in cats['variable'] if str(x) != 'nan'], key=lambda x: -len(x))[0] cats_dropdown = alt.binding_select( options=cats['variable'].unique(), name='variable:') cats_select = alt.selection_single(fields=['variable'], bind=cats_dropdown, name="categorical variable", init={'variable': cats_init}) cats_plot = make_subplot(circ, cats_select, list(tooltip), 'N') has_cats = 1 if 'numerical' in dtypes_set: nums = full_pds.loc[full_pds.dtype == 'numerical'] cats_init = sorted( [x for x in nums['variable'] if str(x) != 'nan'], key=lambda x: -len(x))[0] nums_dropdown = alt.binding_select( options=nums['variable'].unique(), name='variable:') nums_select = alt.selection_single(fields=['variable'], bind=nums_dropdown, name="numerical variable", init={'variable': cats_init}) nums_plot = make_subplot(circ, nums_select, list(tooltip), 'Q') has_nums = 1 title = { "text": text, "color": "black", } if subtext != ['Parameters:']: title.update({ "subtitle": (subtext + ["(based on altair)"]), "subtitleColor": "grey" }) if has_nums and has_cats: circ = alt.hconcat(cats_plot, nums_plot) elif has_nums: circ = nums_plot elif has_cats: circ = cats_plot circ.save(o_html) print('-> Written:', o_html)
COUNTY = pd.read_csv("health_ineq_online_table_12.csv", encoding = "latin-1") COUNTY["cty"] = COUNTY["cty"].astype(int) COVID["geo_value"] = COVID["geo_value"].astype(int) DATA = COUNTY.join(COVID.set_index("geo_value"), how = "inner", on = "cty") DATA['Date'] = pd.to_datetime(DATA.time_value) DATA['Date'] = DATA.Date.dt.strftime('%d').astype(int) return DATA[["Date", "cty", "statename", "state_id", "county_name", "value", "median_house_value", "puninsured2010"]] DATA = load_data() st.title("Percentage of (COVID) Doctor Visits by State and County") st.write("In this section, we explore the percentage of doctor visits for COVID by State and County. We begin by hilighting Pennsylvania and as we can see, there are some interesting observatins for the state. ") alt.data_transformers.disable_max_rows() slider = alt.binding_range(min=1, max=31, step=1) select_date = alt.selection_single(name="January", fields=['Date'], bind=slider, init={'Date':1}) state_selector = alt.selection_multi(fields=['statename'], init=[{'statename':'Pennsylvania'}]) States = alt.Chart(DATA).mark_bar().encode( x=alt.X('value:Q', title="% of Visits to Doctor about COVID", aggregate="mean", scale=alt.Scale(domain=[0, 35])), y=alt.Y('statename:N', title="State"), color=alt.condition(state_selector, alt.value("#f76f5c"), alt.value("#451076")), tooltip=[alt.Tooltip("statename:N", title='State'), alt.Tooltip("value:Q", aggregate="mean", title="% of COVID Doctor Visits", format='.2f')] ).add_selection( state_selector ).add_selection( select_date ).transform_filter( select_date).interactive()
import pandas as pd import altair as alt df2 = pd.read_excel('mianjiandjiage.xlsx') df0 = pd.read_excel('ziru.xlsx') #广州各区租金与面积关系 areas = [ '请选择区域', '荔湾区', '白云区', '海珠区', '黄埔区', '萝岗区', '南沙区', '从化区', '花都区', '番禺区', '天河区', '越秀区', '增城区' ] areas_dropdown = alt.binding_select(options=areas) areas_select = alt.selection_single(fields=['区域'], bind=areas_dropdown, name="district") mj = alt.Chart(df2).mark_bar().encode(x=alt.X( "区域", sort=alt.EncodingSortField(field="平均使用面积/㎡")), y=alt.Y("平均使用面积/㎡"), color='平均使用面积/㎡') mj2 = alt.Chart(df2).mark_point(color="orange").encode(x=alt.X( "区域", sort=alt.EncodingSortField(field="平均使用面积/㎡")), y=alt.Y("每月平均租金/元")) mj3 = alt.Chart(df2).mark_text(color="red").encode( x=alt.X("区域", sort=alt.EncodingSortField(field="平均使用面积/㎡")), y=alt.Y("每月平均租金/元")).add_selection(areas_select).transform_filter( areas_select) #mark_text(align='left', dx=5) zj = alt.Chart(df2).mark_line(color="orange").encode(
"datum.Production_Budget > 100000000.0 ? 100 : 10", Release_Year="year(datum.Release_Date)").transform_filter( alt.datum.IMDB_Rating > 0).transform_filter( alt.FieldOneOfPredicate( field='MPAA_Rating', oneOf=ratings)).encode(x=alt.X( 'Worldwide_Gross:Q', scale=alt.Scale(domain=(100000, 10**9), clamp=True)), y='IMDB_Rating:Q', tooltip="Title:N") # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1) slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_") filter_year = base.add_selection(slider_selection).transform_filter( slider_selection).properties(title="Slider Filtering") # A dropdown filter genre_dropdown = alt.binding_select(options=genres) genre_select = alt.selection_single(fields=['Major_Genre'], bind=genre_dropdown, name="Genre") filter_genres = base.add_selection(genre_select).transform_filter( genre_select).properties(title="Dropdown Filtering") #color changing marks
def plot_results_timeconstant_static(): # the base chart base = alt.Chart(data).transform_calculate( x_jittered = '0.15*random()*datum.taus+datum.taus', ymin = "datum.confIntLow", ymax = "datum.confIntHigh", goal='0.95') selector = alt.selection_single( fields=['methodName'], empty='all', bind='legend') opacity = alt.condition(selector, alt.value(1.0), alt.value(0.5)) #generate the scatter points: points = base.mark_point(filled=True).add_selection(selector).encode( x=alt.X('x_jittered:Q', scale=alt.Scale(type='log'), title='Length of Timeseries (τ)'), y=alt.Y('rate:Q', scale=alt.Scale(domain=[0,1.04]), title='Rate of correct SEM'), size=alt.value(80), color=alt.condition(selector, col, alt.value('lightgrey')), opacity=opacity) selector = alt.selection_single( fields=['methodName'], empty='all', bind='legend') opacity = alt.condition(selector, alt.value(1.0), alt.value(0.5)) #generate the scatter points: line = base.mark_line().add_selection(selector).encode( x=alt.X('x_jittered:Q'), y=alt.Y('rate:Q'), color=alt.condition(selector, col, alt.value('lightgrey')), opacity=opacity) #generate the 95% mark: rule = base.mark_rule(color='black').encode( alt.Y('goal:Q')) selector = alt.selection_single( fields=['methodName'], empty='all', bind='legend') opacity = alt.condition(selector, alt.value(1.0), alt.value(0.5)) errorbars = base.mark_rule(strokeWidth=3).add_selection(selector).encode( alt.X("x_jittered:Q"), alt.Y("ymin:Q", title=''), alt.Y2("ymax:Q"), color=alt.condition(selector, col, alt.value('lightgrey')), opacity=opacity) chart = alt.layer( errorbars, points, line, rule,).properties( width=250, height=200 ).facet(facet=alt.Facet('trueRho:N', title='Autocorrelation parameter (ρ)'), columns=3) chart = chart.configure_header(titleColor='darkred', titleFontSize=16, labelColor='darkred', labelFontSize=14) chart = chart.configure_legend( strokeColor='gray', fillColor='#EEEEEE', padding=10, cornerRadius=10, orient='top') return chart
def get_timeline_plots(df_scores, selected_score, selected_score_axis, selected_score_desc, use_states, countys): title = { "text": ["", selected_score_desc ], # use two lines as hack so the umlauts at Ö are not cut off "subtitle": "EveryoneCounts.de", "color": "black", "subtitleColor": "lightgray", "subtitleFontSize": 12, "subtitleFontWeight": "normal", "fontSize": 15, "lineHeight": 5, } if use_states: titlestr = "Bundesland" scheme = 'category20' else: titlestr = "Landkreis" scheme = 'category10' if len(countys) > 0 and not use_states: # Landkreise df_scores = df_scores[df_scores["name"].isin(countys)].dropna( axis=1, how="all") df_scores = df_scores[["name", "date", selected_score]].dropna() elif use_states: pass else: return None # county mode, nothing selected # altair selectors highlight = alt.selection_single(empty="none", fields=['name'], on='mouseover', nearest=True, clear="mouseout") highlight_circles = alt.selection_single(empty="none", fields=['date', 'name'], on='mouseover', nearest=True, clear="mouseout") # charts base = alt.Chart(df_scores[[ "name", "date", selected_score ]].dropna()).encode(x=alt.X('date:T', axis=alt.Axis(title='Datum', format=("%d %b"))), y=alt.Y(selected_score + ':Q', title=selected_score_axis), color=alt.Color('name', title=titlestr, scale=alt.Scale(scheme=scheme), legend=alt.Legend(orient="bottom", columns=2)), tooltip=[ alt.Tooltip("name:N", title=titlestr), alt.Tooltip(selected_score + ":Q", title=selected_score_axis), alt.Tooltip("date:T", title="Datum", format=("%A %d %B")), ]) points = base.mark_circle().encode( opacity=alt.value(1), size=alt.condition(~highlight_circles, alt.value(40), alt.value(300)), ).add_selection(highlight).add_selection(highlight_circles).properties( width='container', height=450, title=title) lines = base.mark_line().encode( size=alt.condition(~highlight, alt.value(2), alt.value(6)), opacity=alt.condition(~highlight, alt.value(0.5), alt.value(1))) if selected_score in ["airquality_score", "webcam_score", "tomtom_score"]: return points + lines else: # add horizontal rule at 100% rule = alt.Chart(df_scores).mark_rule(color='lightgray').encode( y="a:Q").transform_calculate(a="100") return rule + points + lines
def getBattingChart(): filename="%s/batting.csv" % (settings.MEDIA_ROOT) source = pd.DataFrame(list(Batting.objects.all().values("name","country","average","year","strikerate","runs"))) print(source.head()) alt.data_transformers.disable_max_rows() slider = alt.binding_range(min=1990, max=2018, step=1) select_year = alt.selection_single(name="year", fields=['year'], on='none' ,clear='none', bind=slider, init={'year': 1998}) singlePlayer = alt.selection_single(empty='none', fields=['name'] , init={'name':'SR Tendulkar'}) domain=["INDIA","AUS","PAK","ENG","SA","NZ","WI","BAN","SL"] range_=["#6baed6","yellow","green","red","orange","black","brown","purple","pink"] base=alt.Chart(source).mark_circle().encode( # x=alt.X('average',scale=alt.Scale(domain=[0, 200])), # y=alt.Y('strikerate',scale=alt.Scale(domain=[0, 200])), x='average', y='strikerate', #color='country', color=alt.Color('country', legend=alt.Legend(title='Country', orient = 'left'),scale=alt.Scale(domain=domain, range=range_)), tooltip=['name', 'country', 'average', 'strikerate'] ).add_selection( select_year, singlePlayer ).transform_filter( datum.runs > 450 ).transform_filter( select_year ).properties( title="Batting Records Year Wise" ) titleLine = alt.Chart(source).mark_text(dy=100, size=30, opacity=0.5,text='foo-baz', color='#d6616b').encode( text='name:N', opacity=alt.value(0.5) ).transform_filter( singlePlayer ) label1 = alt.Chart(source).mark_text(align='left', dy=-140,size=15, opacity=0.5,text=' --- average', color='blue') label2 = alt.Chart(source).mark_text(dy=-140, size=15, align='right',opacity=0.5,text=' --- strikerate ', color='red') combinedLine=alt.Chart(source).mark_line(point=True).encode( x='year:Q', ) z=alt.layer( combinedLine.mark_line(color='blue',opacity=.5).encode( y='average', ), combinedLine.mark_line(color='red',opacity=.5).encode( y='strikerate' ) ).transform_filter( singlePlayer ).properties( title='Selected Player Recored over Years' ) myChart=base | z + titleLine + label1 + label2 myChart1=myChart.configure_circle( filled=True, size=200, ).properties( autosize='fit' ) return myChart1
squad = allteams2.loc[index] a = make_team(squad, stand) df_ind = a.at[13, "player"] teams_dict[df_ind] = a stand = stand.sort_values("Total Points", ascending=0).reset_index(drop=True) # create full standing full_stand = roster_infos.merge(stand[["Team", "Total Points", "Num Alive"]], left_on = "GM", right_on = "Team") full_stand = full_stand.sort_values(["Total Points", "Num Alive"], ascending = [False, False]).reset_index(drop=True) full_stand_cols = ["GM", "Total Points", "Num Alive", "QB1", "QB2", "K1", "K2", "D1", "D2", "P1", "P2", "P3", "P4", "P5", "P6", "P7", "SB_Champ", "Runner_Up", "SB_Points"] full_stand = full_stand[full_stand_cols] #### VIZZES single = alt.selection_single() bar = alt.Chart(stand).mark_bar().encode( x = alt.X("Team", sort = alt.SortField(field="Total Points", order='descending'), title = "Team"), y = alt.Y("Total Points"), tooltip = alt.Tooltip(["Team", "Total Points", "Num Alive", "QBs Remaining", "Ks Remaining", "Ds Remaining", "Positions Remaining", "Dead"]), color = alt.Color("Num Alive", scale = alt.Scale(scheme = "lighttealblue", reverse=True)) #color=alt.condition(single, 'count()', alt.value('lightgray'), legend = None) ).properties(width=1200, height=600, title = "Points by Team").configure_axis( labelFontSize=30, titleFontSize=35 ).configure_title(fontSize= 45).add_selection(single) circ = alt.Chart(stand).mark_circle(size=100).encode( x=alt.Y("Total Points", scale=alt.Scale(domain=(min(stand["Total Points"]), max(stand["Total Points"])))), y=alt.Y("Num Alive", title="Players Remaining"),
def wsb_chart( data: pd.DataFrame, xvar: str = "start", x2var: str = "end", xvar_middle: str = "middle", yvar: str = "mantissa", vvar: str = "original", evar: str = "multiplier", xcat: str = "category", w: int = 400, h: int = 400, color_scheme: str = "orangered", title: str = "Width-Scale Bar Chart", ) -> alt.LayerChart: _n_bars = len(data[xcat].unique()) _padding_width = (w / _n_bars) * 0.1 data_with_padding = data.copy() data_with_padding["start"] = (data_with_padding["start"] + _padding_width / 4 + _padding_width / 2 * data_with_padding.index) data_with_padding["end"] = (data_with_padding["end"] + _padding_width / 4 + _padding_width / 2 * data_with_padding.index) data_with_padding["middle"] = ( (data_with_padding["end"] - data_with_padding["start"]) / 2) + data_with_padding["start"] selection = alt.selection_single(fields=["multiplier"], bind="legend") # base = alt.Chart(data, width=w, height=h) base = alt.Chart(data_with_padding, width=w, height=h) bar = ( # base.mark_rect(xOffset=1.0, x2Offset=0.5) base.mark_rect().encode( x=alt.X( f"{xvar}:Q", axis=alt.Axis( titleY=(-0.5 + 22), labels=False, title=xcat.capitalize(), grid=False, # values=data[xvar_middle].to_list(), values=data_with_padding[xvar_middle].to_list(), ), ), x2=alt.X2(f"{x2var}:Q"), y=alt.Y( f"{yvar}:Q", axis=alt.Axis( title=yvar.capitalize(), titleAngle=0, titleAlign="left", titleY=-5, titleX=0, labelExpr="datum.value + ' ×'", ), scale=alt.Scale(domain=[0, 10]), ), color=alt.Color( f"{evar}:O", title="Magnitude Multiplier", legend=alt.Legend(labelExpr="'× ' + format(datum.value, ',')"), scale=alt.Scale(scheme=color_scheme), ), tooltip=[ alt.Tooltip(f"{xcat}:N", title=xcat.capitalize()), alt.Tooltip(f"{vvar}:N", title="Value"), alt.Tooltip(f"{yvar}:Q", title=yvar.capitalize()), alt.Tooltip(f"{evar}:O", format=",", title="Magnitude Multiplier"), ], opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), ).add_selection(selection)) # Altair/Vega-Lite: # Default `labelFontSize` = 10 # Default `tickSize` = 5 # Default `labelPadding` = 2 # Default `translate` = 0.5 text = base.mark_text(align="center", baseline="middle", fontSize=10).encode( x=alt.X(f"{xvar_middle}:Q"), y=alt.value(h + (10 / 2) + 5 + 2 + 0.5), text=alt.Text(f"{xcat}:N"), ) return alt.layer(bar, text, title=alt.TitleParams(title, anchor="start"))
from typing import Optional import altair as alt idle_color = "lightgray" time_selection_brush = alt.selection_interval(encodings=["x"], name="time_select") horizon_hover_brush = alt.selection_single(on="mouseover", nearest=True, encodings=["x"], empty="all") source_selection_brush = alt.selection_multi(fields=["source"], name="source_select") # Create selection brushes that choose the nearest point & selects based on x-value nearest_x_hover_brush = alt.selection_single(nearest=True, on="mouseover", encodings=["x"], empty="none", name="nearest_x_hover") nearest_x_select_brush = alt.selection_single(nearest=True, encodings=["x"], empty="all", name="nearest_x_select") def horizon_selection_brush(init_belief_horizon=None) -> alt.MultiSelection: """Create a brush for selecting one or multiple horizons. :param init_belief_horizon: Optional initialisation value
def omm_chart( data: pd.DataFrame, xvar: str = "category", e_yvar: str = "exponent", m_yvar: str = "mantissa", v_var: str = "original", w: int = 400, h: int = 400, m_color: str = "#F3852A", e_color: str = "#707070", title: str = "Order of Magnitude Markers", ) -> alt.LayerChart: _n_bars = len(data[xvar].unique()) # Default `bandPaddingInner` = 0.1 # More info: https://altair-viz.github.io/user_guide/configuration.html#scale-configuration _e_bar_width = (w / _n_bars) - ((w / _n_bars) * 0.1) _m_bar_width = _e_bar_width / 5 selection = alt.selection_single(fields=["to_color"], bind="legend") base = alt.Chart(data) e_bar = (base.mark_bar(color=e_color, size=_e_bar_width).encode( x=alt.X(f"{xvar}:N", axis=alt.Axis(title=xvar.capitalize())), y=alt.Y( f"{e_yvar}:Q", axis=alt.Axis(title=None), scale=alt.Scale(domain=[0, 10]), ), tooltip=[ alt.Tooltip(f"{xvar}:N", title=xvar.capitalize()), alt.Tooltip(f"{e_yvar}:Q", title=e_yvar.capitalize()), alt.Tooltip(f"{v_var}:N", title="Value"), ], color=alt.Color( "to_color:N", legend=alt.Legend(title="Part"), scale=alt.Scale(domain=["Exponent", "Mantissa"], range=[e_color, m_color]), ), opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), ).transform_calculate(to_color="'Exponent'")) m_bar = (base.mark_bar(color=m_color, size=_m_bar_width).encode( x=alt.X(f"{xvar}:N"), y=alt.Y(f"{m_yvar}:Q"), tooltip=[ alt.Tooltip(f"{xvar}:N", title=xvar.capitalize()), alt.Tooltip(f"{m_yvar}:Q", title=m_yvar.capitalize()), alt.Tooltip(f"{v_var}:N", title="Value"), ], color=alt.Color("to_color:N"), opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), ).transform_calculate(to_color="'Mantissa'")) # Open issue: https://github.com/altair-viz/altair/issues/2009 m_bar = m_bar.add_selection(alt.selection_single()) return (alt.layer(e_bar, m_bar, title=alt.TitleParams(title, anchor="start")).properties( width=w, height=h).add_selection(selection))
======================= This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time. """ # category: case studies import altair as alt from vega_datasets import data source = data.population.url pink_blue = alt.Scale(domain=('Male', 'Female'), range=["steelblue", "salmon"]) slider = alt.binding_range(min=1900, max=2000, step=10) select_year = alt.selection_single(name="year", fields=['year'], bind=slider) alt.Chart(source).mark_bar().encode( x=alt.X('sex:N', axis=alt.Axis(title=None)), y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))), color=alt.Color('sex:N', scale=pink_blue), column='age:O' ).properties( width=20 ).add_selection( select_year ).transform_calculate( "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female") ).transform_filter( select_year )
def viz_paired(df, input_city): # Data Transform cities = df.groupby(['city', 'category_name'])[[ 'city', 'category_name', 'total_business_count', 'sample_rating', 'sample_review_count' ]].mean() cities = cities.join(df.groupby(['city'])[['city', 'total_business_count']].sum(), on='city', rsuffix='_by_city') cities['%_of_total'] = cities['total_business_count'] / cities[ 'total_business_count_by_city'] ## Input city selected_cities = ['New York, New York', input_city] cities_pair = cities.reset_index() cities_pair = cities_pair[cities_pair['city'].isin(selected_cities)] # Create layered visualization viz_cities_slope_circles = alt.Chart(cities_pair).mark_point( size=40, filled=True, opacity=1).encode( x=alt.X('city:N', sort=alt.Sort(selected_cities), axis=alt.Axis(labelAngle=0)), y=alt.Y('%_of_total:Q', axis=alt.Axis(format='.2p', title='Percent of Total Businesses')), color=alt.Color('category_name:N', legend=None), tooltip=[ alt.Tooltip('category_name:N', title='Ethnic Category'), alt.Tooltip('%_of_total:Q', format='.2%', title='Percentage of Total'), alt.Tooltip('total_business_count:Q', title='Count') ]).interactive(bind_x=False) selection_opacity = alt.selection_single(encodings=['y'], on='mouseover', clear="click", empty='none') condition_opacity = alt.condition(selection_opacity, alt.value(1), alt.value(0.2)) condition_size = alt.condition(selection_opacity, alt.value(3), alt.value(2)) viz_cities_slope_line = alt.Chart(cities_pair).mark_line().add_selection( selection_opacity).encode( x=alt.X('city:N', sort=alt.Sort(selected_cities), axis=alt.Axis(labelAngle=0)), y=alt.Y('%_of_total:Q', axis=alt.Axis(format='.2p', title='Percent of Total Businesses')), color=alt.Color('category_name:N', legend=None), opacity=condition_opacity, size=condition_size, tooltip=[ alt.Tooltip('category_name:N', title='Ethnic Category'), alt.Tooltip('%_of_total:Q', format='.2%', title='Percentage of Total'), alt.Tooltip('total_business_count:Q', title='Count') ]).interactive(bind_x=False) viz_cities_slope = (viz_cities_slope_line + viz_cities_slope_circles).properties(height=600) return viz_cities_slope