(alt.Chart(marketing).mark_circle(size=50).encode( x='Salary', y='AmountSpent', color='Age').transform_filter( alt.FieldLTPredicate(field='Salary', lt=120000)).properties(height=400, width=500)) """ The same filtering operation can also be done by using the datum module of Altair. It is simpler in terms of the syntax. The following code will create the same plot as above.""" from altair import datum (alt.Chart(marketing).mark_circle(size=50).encode( x='Salary', y='AmountSpent', color='Age').transform_filter(datum.Salary < 120000).properties(height=400, width=500)) """ Specify a condition for filtering based on a categorical column. For instance, the data points that belong to a set of discrete values can be filtered using the FieldOneOfPredicate method.""" (alt.Chart(marketing).mark_circle(size=50).encode( x='Salary', y='AmountSpent', color='Age').transform_filter( alt.FieldOneOfPredicate(field='Children', oneOf=[0, 2, 3])).properties(height=400, width=500)) """Two plots: One will the a scatter plot that consists of the salary and amount spent columns. The other one will be a bar plot that shows the average salary for the categories in the age column. The second plot will also be used as a filter for the first plot. Calculated the averages by applying the following transformation in the encode function `y='mean(Salary):Q'` """ selection = alt.selection_multi(fields=['Age']) first = (alt.Chart().mark_circle(size=50).encode( x='Salary', y='AmountSpent').transform_filter(selection).properties(height=300, width=500)) second = (alt.Chart().mark_bar().encode( x='Age:O',
#create chloropleth map for San Antonio and surronding counties sa_chart = alt.Chart(counties).mark_geoshape(stroke='grey').encode( color=alt.condition(selection, alt.value('black'), 'cases-per-100K:Q'), tooltip=[ '[properties][NAME]:N', 'cases:Q', 'population:Q', 'cases-per-100K:Q' ]).transform_lookup( lookup='[properties][NAME]', from_=alt.LookupData( cumulative_cases[cumulative_cases['date'] == date_value], 'county', ['cases', 'population', 'cases-per-100K'])).properties( width=800, height=600).add_selection(selection).transform_filter( alt.FieldOneOfPredicate(field='[properties][NAME]', oneOf=[ 'Bexar', 'Medina', 'Bandera', 'Kendall', 'Comal', 'Guadalupe', 'Wilson', 'Atascosa' ])) #create line chart for San Antonio and surrounding coounties sa_line = alt.Chart(cumulative_cases).mark_line().encode( x='date', y='cases-per-100K:Q', color='county', tooltip=['date', 'county', 'cases', 'population', 'cases-per-100K']).transform_filter( alt.FieldOneOfPredicate(field='county', oneOf=[ 'Bexar', 'Medina', 'Bandera', 'Kendall', 'Comal', 'Guadalupe',
# %% heatmap_weekday_growth = ( alt.Chart(heatmap_wrangled_filtered_grouped).mark_rect().encode( x=alt.X("weekday:O", title="Day of Week"), y=alt.Y("pd_week_number:O", title="Week #"), color=alt.Color("sum(minutes):Q", scale=alt.Scale(scheme="warmgreys"), legend=None), tooltip=[ alt.Tooltip("monthdate(date):T", title="Date"), alt.Tooltip("sum(minutes):Q", title="Minutes"), ] # ).transform_filter( # alt.FieldOneOfPredicate(field='weekday', oneOf=[1,2,3,4,5]) ).transform_filter( alt.FieldOneOfPredicate(field="type", oneOf=growth_types))) # heatmap_weekend_growth = alt.Chart(heatmap_wrangled_filtered_grouped).mark_rect().encode( # x= alt.X("weekday:O", title=None), # y= alt.Y("pd_week_number:O", axis=None), # color= alt.Color('sum(minutes):Q',scale=alt.Scale(scheme="warmgreys"), legend=None), # tooltip=[ # alt.Tooltip('monthdate(date):T', title='Date'), # alt.Tooltip('sum(minutes):Q', title='Minutes') # ] # ).transform_filter( # alt.FieldOneOfPredicate(field='weekday', oneOf=[6,7]) # ).transform_filter( # alt.FieldOneOfPredicate(field='type', oneOf = growth_types) # )
]) chart_data = data.loc[((data.Year == 1990) | (data.Year == 2010)) & (data['Unit Code'] == 'T_CO2_EQVT') & (data.PowerCode == 'Thousands')] # altair doesn't allow to use custom str values for group bar charts descriptions = ['Energy', 'Manufacturing', 'Transport', 'Residential', 'Other'] fields = ['ENER_IND', 'ENER_MANUF', 'ENER_TRANS', 'ENER_OSECT', 'ENER_OTH'] for i in range(len(descriptions)): chart_data.replace(fields[i], descriptions[i], inplace=True, regex=True) chart = alt.Chart( chart_data, title="Greenhouse gas emissions by sector in 1990 and 2010").mark_bar( ).transform_filter(alt.FieldOneOfPredicate( field='VAR', oneOf=descriptions)).transform_calculate( mill_value='datum.Value/1000', ).encode( alt.Y('mill_value:Q', axis=alt.Axis(tickCount=5, domainWidth=0, title='CO2 equivalent (megatonnes)', titleFontWeight='lighter', titlePadding=10)), alt.Color('Year:O', legend=alt.Legend(title='Years', titleFontSize=12, labelFontSize=12, titlePadding=10, titleFontWeight='lighter')), alt.X('Year:O', axis=None), alt.Column('VAR:N',
ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R'] genres = [ 'Action', 'Adventure', 'Black Comedy', 'Comedy', 'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical', 'Romantic Comedy', 'Thriller/Suspense', 'Western' ] base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate( Rounded_IMDB_Rating="floor(datum.IMDB_Rating)", Hundred_Million_Production= "datum.Production_Budget > 100000000.0 ? 100 : 10", Release_Year="year(datum.Release_Date)").transform_filter( alt.datum.IMDB_Rating > 0).transform_filter( alt.FieldOneOfPredicate( field='MPAA_Rating', oneOf=ratings)).encode(x=alt.X( 'Worldwide_Gross:Q', scale=alt.Scale(domain=(100000, 10**9), clamp=True)), y='IMDB_Rating:Q', tooltip="Title:N") # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1) slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_") filter_year = base.add_selection(slider_selection).transform_filter( slider_selection).properties(title="Slider Filtering")
def error_span_durations(traceID): traces = load_traces() traces['error'] = traces['error'].fillna("false") traceWithError = traces.loc[traces['traceId'] == traceID] traceWithError = traceWithError.sort_values(by=['traceId', 'timestamp'], ascending=True).reset_index() traceWithErrorSpans = traceWithError.loc[traceWithError['error'] == True] traceWithError["start"] = 0 traceWithError["end"] = 0 spanCount = len(traceWithError) print(traceWithError.loc[0]) traceWithError.loc[0, 'end'] = traceWithError.loc[0, 'duration'] for i in range(1, spanCount): traceWithError.loc[i, 'start'] = traceWithError.loc[ i, 'timestamp'] - traceWithError.loc[0, 'timestamp'] traceWithError.loc[i, 'end'] = traceWithError.loc[i, 'start'] + ( traceWithError.loc[i, 'duration']) #Get list of span service names associated with errors error_spans = [] for i in range(len(traceWithError)): if traceWithError.loc[i, 'error'] == True: error_spans.append(traceWithError.loc[i, 'name']) #Generate histogram for all data points hist = alt.Chart().mark_bar().encode( y=alt.Y('count()', axis=alt.Axis()), x=alt.X('duration', axis=alt.Axis(title='Span Duration')), tooltip=['duration', 'error'], color=alt.Color( 'error', title="Has Error", scale=alt.Scale(range=['#D62728', '#1F77B4']))).transform_filter( alt.FieldOneOfPredicate(field='name', oneOf=error_spans)) #Generate chart with datapoints for span with error error_hist = alt.Chart().mark_bar(color='red').encode( y=alt.Y('count()', axis=alt.Axis(title='Count of Spans', format=".0f", tickMinStep=1)), x=alt.X('duration', axis=alt.Axis(title='Span Duration')), tooltip=['duration', 'error'], color=alt.Color( 'traceId', scale=alt.Scale(range=['orange']))).interactive().transform_filter( (alt.datum.error == True)).transform_filter( (alt.datum.traceId == traceID)) summaries = [] charts = [] for name, duration in zip(traceWithErrorSpans.name, traceWithErrorSpans.duration): subset = traces.loc[traces['name'] == name] percentage = ("%.1f" % (100 - (stats.percentileofscore( subset['duration'], duration, kind='weak')))) summary = f"{percentage}% of all {name} span durations are greater than the {name} span that errored in Trace ID: {traceID}." summaries.append(summary) chart = alt.layer(hist, error_hist, data=subset).properties( title=summary).resolve_scale(color='independent') charts.append(chart) #for i in range(len(charts)): # return charts[0].to_json() | stackCharts = alt.vconcat(*charts) return stackCharts.to_json()
color=alt.Color('variable_posneg', title='Impacto'), tooltip=[ alt.Tooltip('mean(Mean_conditions):Q', title='Media', format=',.2f') ]).transform_filter( (datum.variable_category == 'Economía no básica')).transform_filter( alt.FieldOneOfPredicate( field='variable', oneOf=[ 'Vacaciones No', 'Vacaciones_Sí', 'Alimentación No', 'Alimentación Sí', '"Colchón" económico Sí', '"Colchón" económico No', 'Alto impacto coste vivienda', 'Bajo impacto coste vivienda', 'Impacto medio coste vivienda', 'Calefacción No', 'Calefacción Sí', 'Riesgo de pobreza No', 'Riesgo de pobreza Sí', 'Carencia Material Severa No', 'Carencia material severa', 'Enfermedades Crónicas No', 'Enfermedades Crónicas Sí', 'Limitaciones físicas No', 'Limitaciones físicas leves Sí', 'Limitaciones físicas graves Sí', 'Compra de ropa No', 'Compra de ropa Sí', 'Compra de zapatos No', 'Compra de zapatos Sí', 'Ocio con amigos No', 'Ocio con amigos Sí', 'Ocio en general No', 'Ocio en general Sí', 'Gasto en uno mismo No', 'Gasto en uno mismo Sí', 'Acceso a internet No', 'Acceso a internet Sí' ])).properties(height=300, width=300, title='Variables económicas hedonistas') x2 = alt.Chart(df_vis_nac).mark_bar().encode( x=alt.X('variable:O', title=None),
color=alt.Color('primary:N'), # column = alt.Column('date:T', timeUnit='year') ) color = alt.Chart(df).mark_text(baseline='bottom').encode( x=alt.X('primary:N', sort=alt.Sort(field='count', op='mean', order='descending')), y=alt.Y('count:Q'), text='count:Q', # column = alt.Column('date:T', timeUnit='year') ) chrts.append((chart + color).properties( width=280, title='Year ' + str(years[y])).transform_calculate( year='year(datum.date)').transform_filter( alt.FieldOneOfPredicate(field='year', oneOf=[int( years[y])])).transform_aggregate(count='count()', groupby=['primary'])) alt.hconcat(*chrts) base.mark_trail().encode(x=alt.X('date:T', timeUnit='year'), y=alt.Y('primary:N', sort=alt.Sort(op='count', order='descending')), detail=alt.Detail('primary'), size=alt.Size('count()', scale=alt.Scale(range=[1, 25]))) """# Чернетка # Task 1 ## V4
def plot_time(query_string, highlight_country, year_range): """Function to create a time series plot showing the country-wise global obesity rates Function to create a time series(spaghetti) plot showing the global obesity rates for all the countries for a range of years as selected by the user Args: query_string ([str]): string containing the attributes to be used in a pandas query for filtering the data for the bar plot highlight_country ([str]): name of the country to be highlighted in the time series plot year_range ([float]): range of years to be selected for the time series plot Returns: [altair chart]: An altair time series plot showing the country-wise global obesity rates """ # Filter data ob_yr = he.make_rate_data(["country", "year"], ["obese"], query_string) # Create labels title_label = "World Obesity" sub_label = str(year_range[0]) + "-" + str(year_range[1]) # Format country highlight_country = ([highlight_country] if type(highlight_country) == str else highlight_country) # Get data for highlighted countries highlighted_data = ob_yr[ob_yr["country"].isin(highlight_country)] highlighted_data.loc[:, "highlighted"] = [ country if country in highlight_country else "other" for country in highlighted_data["country"] ] # Create chart country_time_chart = ( alt.Chart( ob_yr, title=alt.TitleParams( text=title_label, subtitle=sub_label)).mark_line().encode( x=alt.X( "year:O", scale=alt.Scale(zero=False), title="Years", axis=alt.Axis(grid=False), ), y=alt.Y( "obese:Q", title="Obesity Rate", axis=alt.Axis(format="%"), ), color=alt.condition( alt.Predicate( alt.FieldOneOfPredicate(field="country", oneOf=highlight_country)), "country", alt.value("lightgray"), # legend=None, ), opacity=alt.condition( alt.Predicate( alt.FieldOneOfPredicate(field="country", oneOf=highlight_country)), alt.value(1), alt.value(0.2), ), tooltip="country", ).properties(width=450, height=300).interactive()) highlighted_time_chart = (alt.Chart(highlighted_data).mark_line().encode( x=alt.X( "year:O", scale=alt.Scale(zero=False), title="Years", axis=alt.Axis(grid=False), ), y=alt.Y( "obese:Q", title="Obesity Rate", axis=alt.Axis(format="%"), ), color=alt.Color( "highlighted", legend=alt.Legend(title="Countries", values=highlight_country), ), tooltip="country", )) # return country_time_chart return country_time_chart + highlighted_time_chart
def generate_line_chart( years: list, width: int = 700, height: int = 500, ) -> alt.Chart: # filter data calendar_pd_filtered = calendar_pd.drop_duplicates(subset=['date'], keep='first') calendar_pd_filtered = calendar_pd_filtered[ calendar_pd_filtered.price < 5000] calendar_pd_filtered = calendar_pd_filtered[ calendar_pd_filtered.price.notna()] # select a point for which to provide details-on-demand click = alt.selection_multi( fields=['year'], # limit selection to x-axis value empty='none', # empty selection includes no data points init=[{ "year": years[0] }]) # select a point for which to provide details-on-demand hover = alt.selection_single( encodings=['x'], # limit selection to x-axis value on='mouseover', # select on mouseover events nearest=True, # select data point nearest the cursor empty='none' # empty selection includes no data points ) # legend legend = alt.Chart(calendar_pd_filtered).mark_point().encode( y=alt.Y('year:N', axis=alt.Axis(orient='right')), color=alt.condition( click, alt.Color('year:N', legend=None), alt.value('lightgray'))).add_selection(click).transform_filter( alt.FieldOneOfPredicate(field='year', oneOf=years)) # define our base line chart of stock prices base = alt.Chart(calendar_pd_filtered).mark_line().encode( x=alt.X('partial_date:T'), y=alt.Y('price:Q'), color=alt.Color('year:N', legend=None)).transform_filter( alt.FieldOneOfPredicate(field='year', oneOf=years)) final_chart = alt.layer( base.encode(opacity=alt.condition(click, alt.value(1), alt.value( 0.1))), # base line chart # add a rule mark to serve as a guide line alt.Chart(calendar_pd_filtered).mark_rule(color='#aaa').encode( x='partial_date:T').transform_filter(hover), # add circle marks for selected time points, hide unselected points base.mark_circle().encode( opacity=alt.condition(hover, alt.value(1), alt.value(0)) ).add_selection(hover).transform_filter(click), # add white stroked text to provide a legible background for labels base.mark_text( align='left', dx=5, dy=-5, stroke='white', strokeWidth=2).encode(text=alt.Text('label:N')). transform_filter(hover).transform_filter(click).transform_calculate( label=f'"price:$" + datum.price + " date:" + datum.clean_date'), # add text labels for stock prices base.mark_text(align='left', dx=5, dy=-5).encode( text='label:N' ).transform_filter(hover).transform_filter(click).transform_calculate( label=f'"price:$" + datum.price + " date:" + datum.clean_date'), ).properties(width=width, height=height, title="price over time").interactive() | legend return final_chart
def plot_times_series_prediction(df_, prediciton_mean_, prediction_ci_=None, width=800, height=150): '''Dibuja la prediccion con el intervalo de confianza si es proporcionado df_ = dataframe con la serie temporal (fechas en el indice) prediciton_mean_ = Salida del modelo con la media de la prediccion (fechas en el indice) prediction_ci_ = Intercalos de confianza ('upper y' y 'lower y')(fechas en el indice) ''' if prediction_ci_ is not None: prediction_ci = prediction_ci_.copy() prediciton_mean_ = prediciton_mean_.copy() df = df_.copy() #Creacion del data frame para pintado prediciton_mean_df = pd.DataFrame(prediciton_mean, columns=['prediction']) if prediction_ci_ is not None: prediction_ci_df = pd.DataFrame(prediction_ci) prediction_df = prediction_ci_df.merge(prediciton_mean_df, left_index=True, right_index=True) else: prediction_df = prediciton_mean_df df_plot = df.merge(prediction_df, left_index=True, right_index=True, how='outer') df_plot.reset_index(inplace=True, ) df_plot = df_plot.melt(id_vars=['index']) df_plot.dropna(inplace=True) #Tema para el plot scheme = alt.Scale(scheme='tableau10') #Filtrado por barrido brush = alt.selection(type="interval", encodings=["x"]) #Plot times series principal main_times_series_plot = alt.Chart(df_plot).mark_line( point=False, opacity=0.7).encode( x=alt.X('index:T'), y=alt.Y('value:Q', axis=alt.Axis(grid=True)), color=alt.Color('variable', scale=scheme, legend=alt.Legend(title='Leyenda', symbolSize=30, symbolType='circle', symbolStrokeWidth=5, orient="top", direction="horizontal", labelFont='Calibri', labelFontSize=12, titleFont='Calibri', titleFontSize=15, titleFontWeight=900)) ).properties(width=width, height=height, title='Serie completa para el filtrado').transform_filter( alt.FieldOneOfPredicate(field='variable', oneOf=['y', 'prediction'])) #Plot de la banda de confianza si hay if prediction_ci_ is not None: band = alt.Chart(df_plot).mark_errorband( extent='ci', color='red', opacity=0.1).encode( x=alt.X('index:T'), y=alt.Y('value:Q')).transform_filter( alt.FieldOneOfPredicate(field='variable', oneOf=['lower y', 'upper y'])) #Plot del segundo grafico con tooltip y puntos second_times_series_plot = alt.Chart(df_plot).mark_line( point=True, opacity=0.7).encode( x=alt.X('index:T'), y=alt.Y('value:Q', axis=alt.Axis(grid=True)), color=alt.Color('variable', scale=scheme, legend=alt.Legend(title='Leyenda', symbolSize=30, symbolType='circle', symbolStrokeWidth=5, orient="top", direction="horizontal", labelFont='Calibri', labelFontSize=12, titleFont='Calibri', titleFontSize=15, titleFontWeight=900)), tooltip=['index:T', 'value:Q', 'variable']).properties( width=width, height=height, title='Serie ampliada').transform_filter( alt.FieldOneOfPredicate(field='variable', oneOf=['y', 'prediction'])) if prediction_ci_ is not None: t1 = band + main_times_series_plot.add_selection(brush) t2 = (band + second_times_series_plot).transform_filter(brush) return (t1 & t2) else: t1 = main_times_series_plot.add_selection(brush) t2 = (second_times_series_plot).transform_filter(brush) return (t1 & t2)
def app(): ####### Datasets control_dataset = 'https://raw.githubusercontent.com/JulioCandela1993/VisualAnalytics/master/data/control_policy.csv' deaths_dataset = 'https://raw.githubusercontent.com/JulioCandela1993/VisualAnalytics/master/data/deaths.csv' ####### Dataframes control_df = pd.read_csv(control_dataset) ####### Dashboard st.title("Tobacco Control") st.markdown(''' The following analysis is based on the evaluation made by World Health Organization (WHO) to country policies against Tobacco. A score from 1 to 5 is assigned depending on the intensity of a country to deal with Tobacco issues being 1 the worst and 5 the best ''') ####### Control Measures given by WHO control_metrics = ["Monitor", "Protect from tobacco smoke", "Offer help to quit tobacco use", "Warn about the dangers of tobacco", "Enforce bans on tobacco advertising", "Raise taxes on tobacco", #"Anti-tobacco mass media campaigns" ] # Main Selector of Control Measures cols = st.selectbox('Select control measure: ', control_metrics) if cols in control_metrics: metric_to_show_in_covid_Layer = cols +":Q" metric_name = cols years = ['2008', '2010', '2012', '2014', '2016', '2018'] columns_year = [metric_name+" "+str(year) for year in years] columns = ["d" +str(year) for year in years] container_map = st.beta_container() ####### Map Visualization with container_map: st.header("How are countries controlling Tobacco consumption?") #st.header('"'A global view of the implementation of the policy """ around the world'"') st.markdown(''' In the folling map, we can identify the intensity of a specific control policy for each country. We can also see the evolution of these policies from 2008 to 2018 ''') # Year Selector select_year_list = st.selectbox('Select year: ', years)#st.slider('Select year: ', 2008, 2018, 2008, step = 2) select_year = int(select_year_list) # Map Topology url_topojson = 'https://raw.githubusercontent.com/JulioCandela1993/VisualAnalytics/master/world-countries.json' data_topojson_remote = alt.topo_feature(url=url_topojson, feature='countries1') ### Map Chart map_geojson = alt.Chart(data_topojson_remote).mark_geoshape( stroke="black", strokeWidth=1, #fill='lightgray' ).encode( color=alt.Color(metric_to_show_in_covid_Layer), ).transform_lookup( lookup="properties.name", from_=alt.LookupData(control_dataset, "Country", [metric_name,"Year"]) ).properties( width=700, height=500 ) choro = alt.Chart(data_topojson_remote, title = 'Implementation of the policy "' +metric_name+'" around the world').mark_geoshape( stroke='black' ).encode( color=alt.Color(metric_to_show_in_covid_Layer, scale=alt.Scale(range=['#ffe8dd','#ef4f4f']), legend=None), tooltip=[ alt.Tooltip("properties.name:O", title="Country"), alt.Tooltip(metric_to_show_in_covid_Layer, title=metric_name), alt.Tooltip("year:Q", title="Year"), ], ).transform_calculate( d2008 = "1", d2010 = "1", d2012 = "1", d2014 = "1", d2016 = "1", d2018 = "1" ).transform_fold( columns, as_=['year', 'metric'] ).transform_calculate( yearQ = 'replace(datum.year,"d","")' ).transform_calculate( key_val = 'datum.properties.name + datum.yearQ' ).transform_lookup( lookup="key_val", from_=alt.LookupData(control_dataset, "ID", [metric_name,"Year"]) ).transform_calculate( year='parseInt(datum.Year)', ).transform_filter( alt.FieldEqualPredicate(field='year', equal=select_year) ) st.altair_chart(map_geojson + choro) ## Qualification array qualifications = pd.DataFrame.from_dict({ "keys": [1,2,3,4,5], "category":["1.Very Bad", "2.Bad", "3.Medium", "4.Good", "5.Perfect"] }) ## Legend Chart ##### Data Transformations legend_info = alt.Chart(control_dataset).transform_joinaggregate( num_countries='count(*)', ).transform_filter( alt.FieldEqualPredicate(field='Year', equal=select_year) ).transform_lookup( lookup=metric_name, from_=alt.LookupData(qualifications, "keys", ["category"]) ).transform_aggregate( count='count()', groupby=[metric_name,"category"] ).transform_joinaggregate( total='sum(count)' ).transform_calculate( pct='datum.count / datum.total' ) legend_bar = legend_info.mark_bar().encode( x=alt.X('pct:Q', stack="normalize", sort=alt.SortField(metric_to_show_in_covid_Layer), title = None, axis = None), color=alt.Color(metric_to_show_in_covid_Layer, scale=alt.Scale(range=['#ffe8dd','#ef4f4f']) ,legend = None), tooltip=[ alt.Tooltip(metric_to_show_in_covid_Layer, title=metric_name) ], ) legend_value = legend_info.mark_text(dx = -11, align='center', baseline='middle', color='black', fontWeight = "bold").encode( x=alt.X('pct:Q', sort=alt.SortField(metric_to_show_in_covid_Layer), stack='normalize', axis = None), #detail = metric_to_show_in_covid_Layer, color=alt.Color(metric_name +":O", scale=alt.Scale(range=['#000000','#000000']) ,legend = None), text=alt.Text('pct:Q',format='.0%') ) legend_category = legend_info.mark_text(dx = 10, dy = 10, align='left', baseline='middle', color='black', angle = 90, fontWeight = "bold").encode( x=alt.X('pct:Q', sort=alt.SortField(metric_to_show_in_covid_Layer), stack='normalize', axis = None), #detail = metric_to_show_in_covid_Layer, color=alt.Color(metric_name +":O", scale=alt.Scale(range=['#000000','#000000']) ,legend = None), #text=alt.Text(metric_to_show_in_covid_Layer) text=alt.Text("category:N") ) legend_chart = (legend_bar + legend_value + legend_category).properties( width=700, height=100, title = metric_name ).configure_title(align = "left" ).configure_view( strokeWidth=0 ) st.altair_chart(legend_chart) ##### Evolution of policy per selected countries container_policycountry = st.beta_container() with container_policycountry: st.header("Evolution of the policy per country") st.markdown(''' In addition, we can evaluate and compare the evolution of the selected policy among different countries of our interest: ''') ## Selector of countries countries = st.multiselect('Select countries to plot', control_df.groupby('Country').count().reset_index()['Country'].tolist(), default=['China', 'India', 'France']) st.markdown(''' ''') xscale_barchart = alt.Scale(domain=(0, 5)) ## Comparisson Chart of Policies per country barchart_country = alt.Chart(control_dataset,width=90,height=20, title = 'Evolution of Policy "' + metric_name + '" per selected countries' ).mark_bar( size = 20 ).encode( alt.X('value:Q', scale = xscale_barchart, title = "", axis = alt.Axis(grid = False)), alt.Row('Country:N', title = "", spacing = 5, header = alt.Header(labelAngle = 0, labelAlign = "left",labelLimit = 100)), alt.Column("Year:O", title = "", spacing = 10), color=alt.Color("value:Q", scale=alt.Scale(domain=[1,4], range=['#ffe8dd','#ef4f4f']), legend=None), tooltip=[ alt.Tooltip("Country:N", title="Country"), alt.Tooltip(metric_to_show_in_covid_Layer, title=metric_name), alt.Tooltip("Year:O", title="Year"), ] ).transform_fold( [metric_name], as_ = ['Measurement_type', 'value'] ).transform_filter( alt.FieldOneOfPredicate(field="Country", oneOf=countries) ).transform_filter( {'field': 'Year', 'range': [2008,2018]} ).configure_title(align = "center", anchor = "middle", dy = -10) st.altair_chart(barchart_country) st.altair_chart(legend_chart) ####### Scatterplot control policy vs deaths def render_latex(formula, fontsize=10, dpi=100): """Renders LaTeX formula into Streamlit.""" fig = plt.figure() text = fig.text(0, 0, '$%s$' % formula, fontsize=fontsize) fig.savefig(BytesIO(), dpi=dpi) # triggers rendering bbox = text.get_window_extent() width, height = bbox.size / float(dpi) + 0.05 fig.set_size_inches((width, height)) dy = (bbox.ymin / float(dpi)) / height text.set_position((0, -dy)) buffer = BytesIO() fig.savefig(buffer, dpi=dpi, format='jpg') plt.close(fig) st.image(buffer) container_correlation = st.beta_container() with container_correlation: st.header("Are the policies having an impact in the deaths by Smoking?") st.markdown(''' Countries have implemented different control policies against Tobacco which have been measured by WHO from 2008 until 2018. During this period, some countries have strengthen their policies; however, we don't know the real impact of them. As a consequence, the following visualization measures the correlation of the change in control policies with respect to the change in deaths by Smoking. The definitions of % of change are the following: ''') render_latex(r'\%\ change\ in\ '+metric_name+r'\ =\ \frac{'+metric_name+r'\ in\ 2016}{'+metric_name+r'\ in\ 2008}') render_latex(r'\%\ change\ in\ Deaths\ by\ Smoking\ =\ \frac{Deaths\ by\ Smoking\ in\ 2016}{Deaths\ by\ Smoking\ in\ 2008}') st.markdown(''' The user can also select brush the histograms in order to filter the points and evaluate the slope of the regression in more detail (with groups that increased more or less in control policies, for example) ''') brush = alt.selection_interval() ## Data Transformations base_scatter = alt.Chart(control_dataset).transform_lookup( lookup="ID", from_=alt.LookupData(deaths_dataset, "ID", ["deaths","Year"]) ).transform_calculate( deaths='parseFloat(datum.deaths)', year='parseInt(datum.Year)', metric = alt.datum[metric_name] ).transform_calculate( deaths_2016='datum.year==2016?datum.deaths:0', deaths_2008='datum.year==2008?datum.deaths:0', metric_2016='datum.year==2016?datum.metric:0', metric_2008='datum.year==2008?datum.metric:0', year='parseInt(datum.Year)', sizepoint = '2' ).transform_aggregate( deaths_2016='sum(deaths_2016)', metric_2016='sum(metric_2016)', deaths_2008='sum(deaths_2008)', metric_2008='sum(metric_2008)', groupby=["Country"] ).transform_calculate( incr_ratio_deaths='((datum.deaths_2016/datum.deaths_2008)-1)*100', incr_ratio_metric='((datum.metric_2016/datum.metric_2008)-1)*100', ) xscale = alt.Scale(domain=(-100, 300)) yscale = alt.Scale(domain=(-100, 200)) ## Scatterplot of changes in Policy and changes in deaths points_scatter = base_scatter.mark_point(size=50, stroke="#ef4f4f").encode( alt.X('incr_ratio_metric:Q', scale = xscale, title = '% change of efforts in ' + metric_name + ' from 2008 to 2016'), alt.Y('incr_ratio_deaths:Q', scale=yscale, title = '% change in deaths from 2008 to 2016'), #color=alt.condition(brush, alt.value('blue'), alt.value('lightgray')), #opacity=alt.condition(brush, alt.value(0.75), alt.value(0.05)), tooltip=[ alt.Tooltip("deaths_2016:Q", title="Deaths in 2016"), alt.Tooltip("deaths_2008:Q", title="Deaths in 2008"), alt.Tooltip("Country:N", title="Country"), ], ).properties( width=450, height=450 ).transform_filter(brush) regression_scatter = points_scatter.transform_regression( on='incr_ratio_metric', regression='incr_ratio_deaths', method = 'linear' ).mark_line(color='#19456b') scatter_final = (points_scatter + regression_scatter) # Histogram of changes in policy top_hist = base_scatter.mark_area(line=True, opacity=0.3).encode( alt.X("incr_ratio_metric:Q", bin=alt.Bin(maxbins=30, extent=xscale.domain), title='' ), alt.Y('count()', title=''), color=alt.value("#ef4f4f") ).add_selection( brush ).properties(width=450 , height=100, title = "Distribution of % change in policy") # Histogram of changes in deaths right_hist = base_scatter.mark_area(line=True, opacity=0.3).encode( alt.Y('incr_ratio_deaths:Q', bin=alt.Bin(maxbins=20, extent=yscale.domain), title='', ), alt.X('count()', title=''), color=alt.value("#ef4f4f") ).add_selection( brush ).properties(width=110, height=450, title=alt.TitleParams(text="Distribution of % change in deaths", align="center", angle = 90, orient = 'right') ) st.altair_chart((top_hist & (scatter_final |right_hist ) ).properties(title = "Correlation between % change in policy and % change in deaths" ).configure_title(align = "center", anchor = "middle", dy = -10))
def graph_compare_cea2034(df, graph_params, speaker1, speaker2): selection1, selection2, selectorsMeasurements, scales = build_selections( df, speaker1, speaker2) # TODO(move to parameters) x_axis = alt.X('Freq:Q', scale=alt.Scale(type="log", domain=[20, 20000], nice=False)) y_axis = alt.Y('dB:Q', scale=alt.Scale(zero=False, domain=[-40, 10])) color = alt.Color('Measurements', type='nominal', sort=None) opacity = alt.condition(selectorsMeasurements, alt.value(1), alt.value(0.2)) line = alt.Chart(df).transform_filter( alt.FieldOneOfPredicate(field='Measurements', oneOf=[ 'On Axis', 'Listening Window', 'Early Reflections', 'Sound Power' ])).encode(x=x_axis, y=y_axis, color=color, opacity=opacity) points = line.mark_circle(size=100).encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0)), tooltip=['Measurements', 'Freq', 'dB']) di_axis = alt.Y('dB:Q', scale=alt.Scale(zero=False, domain=[-10, 40], nice=False)) di = alt.Chart(df).transform_filter( alt.FieldOneOfPredicate( field='Measurements', oneOf=['Early Reflections DI', 'Sound Power DI'])).encode(x=x_axis, y=di_axis, color=color, opacity=opacity) points_di = di.mark_circle(size=100).encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0)), tooltip=['Measurements', 'Freq', 'dB']) spin_full = alt.layer(points + line.mark_line(), points_di + di.mark_line(clip=True)).resolve_scale( y='independent').properties(width=600, height=300) spin_dash = alt.layer( points + line.mark_line(strokeDash=[4, 2]), points_di + di.mark_line(clip=True, strokeDash=[4, 2])).resolve_scale( y='independent').properties(width=600, height=300) line1 = spin_full.add_selection(selection1).transform_filter(selection1) line2 = spin_dash.add_selection(selection2).transform_filter(selection2) points = line.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0))) rules = alt.Chart(df).mark_rule(color='gray').encode( x='Freq:Q').transform_filter(nearest) layers = alt.layer( line2, line1, rules).add_selection(selectorsMeasurements).add_selection( scales).add_selection(nearest).interactive() return layers
def graph_spinorama(dfu, graph_params): xmin = graph_params['xmin'] xmax = graph_params['xmax'] ymin = graph_params['ymin'] ymax = graph_params['ymax'] if xmax == xmin: logging.error('Graph configuration is incorrect: xmin==xmax') if ymax == ymin: logging.error('Graph configuration is incorrect: ymin==ymax') # add selectors selectorsMeasurements = alt.selection_multi(fields=['Measurements'], bind='legend') scales = alt.selection_interval(bind='scales') # main charts xaxis = alt.X('Freq:Q', title='Freqency (Hz)', scale=alt.Scale(type='log', base=10, nice=False, domain=[xmin, xmax]), axis=alt.Axis(format='s')) yaxis = alt.Y('dB:Q', title='Sound Pressure (dB)', scale=alt.Scale(zero=False, domain=[ymin, ymax])) # why -10? di_yaxis = alt.Y('dB:Q', title='Sound Pressure DI (dB)', scale=alt.Scale(zero=False, domain=[-5, ymax - ymin - 5])) color = alt.Color('Measurements', type='nominal', sort=None) opacity = alt.condition(selectorsMeasurements, alt.value(1), alt.value(0.2)) line = alt.Chart(dfu).mark_line().transform_filter( alt.FieldOneOfPredicate(field='Measurements', oneOf=[ 'On Axis', 'Listening Window', 'Early Reflections', 'Sound Power' ])).encode(x=xaxis, y=yaxis, color=color, opacity=opacity) circle = alt.Chart(dfu).mark_circle(size=100).transform_filter( alt.FieldOneOfPredicate( field='Measurements', oneOf=[ 'On Axis', 'Listening Window', 'Early Reflections', 'Sound Power' ])).encode(x=xaxis, y=yaxis, color=color, opacity=alt.condition(nearest, alt.value(1), alt.value(0)), tooltip=['Measurements', 'Freq', 'dB']) di = alt.Chart(dfu).mark_line().transform_filter( alt.FieldOneOfPredicate( field='Measurements', oneOf=['Early Reflections DI', 'Sound Power DI'])).encode(x=xaxis, y=di_yaxis, color=color, opacity=opacity) circle_di = alt.Chart(dfu).mark_circle(size=100).transform_filter( alt.FieldOneOfPredicate( field='Measurements', oneOf=['Early Reflections DI', 'Sound Power DI' ])).encode(x=xaxis, y=di_yaxis, color=color, opacity=alt.condition(nearest, alt.value(1), alt.value(0)), tooltip=['Measurements', 'Freq', 'dB']) # assemble elements together spin = alt.layer(circle + line, circle_di + di).resolve_scale( y='independent').add_selection(selectorsMeasurements).add_selection( scales).add_selection(nearest).properties( width=graph_params['width'], height=graph_params['height']) return spin
def stack_bar_chart(): rl_vio = doc(0) #rl_vio["YEAR"] = rl_vio["YEAR"].astype("int") source = rl_vio[rl_vio["YEAR"] > 2014] crash_type = [ "FAILING TO REDUCE SPEED TO AVOID CRASH", "FAILING TO YIELD RIGHT-OF-WAY", "FOLLOWING TOO CLOSELY", "IMPROPER LANE USAGE", "IMPROPER OVERTAKING/PASSING" ] st.sidebar.title("What causes the accidents?") select1 = st.sidebar.selectbox("Choose the crash type: ", crash_type) select2 = st.sidebar.selectbox("Choose the year: ", [2015, 2016, 2017, 2018, 2019, 2020, 2021]) st.text("Go back to see all types of causes? Click 'View All'!") if st.button("View All"): cha = alt.Chart(source).mark_bar(size=20).encode( alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS)"]), alt.Y('YEAR:O', title="Year", axis=alt.Axis(grid=False, labelAngle=0)), alt.X('sum(RECORDS)', axis=alt.Axis(grid=False, labelAngle=0), title="Records"), color="CAUSE", order=alt.Order( # Sort the segments of the bars by this field 'CAUSE', sort='ascending')).properties( height=400, width=850).transform_filter( alt.FieldOneOfPredicate( field='CAUSE', oneOf=crash_type)).interactive() else: if select1 in crash_type: cha = alt.Chart(source).mark_bar(size=20).encode( alt.Tooltip(["CAUSE:N", "sum(RECORDS)"]), alt.Y('YEAR:O', axis=alt.Axis(grid=False, labelAngle=0), title="Year"), alt.X('sum(RECORDS)', axis=alt.Axis(grid=False, labelAngle=0, tickMinStep=1), title="Records"), color=alt.value("#e7ba52")).properties( height=400, width=600).transform_filter(alt.datum.CAUSE == select1) if select2: cha = alt.Chart(source).mark_bar(size=20).encode( alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS)"]), alt.Y('MONTH:O', axis=alt.Axis(grid=False, labelAngle=0), title="Month"), alt.X('sum(RECORDS)', axis=alt.Axis(grid=False, labelAngle=0, tickMinStep=1), title="Records"), color=alt.value("darkgray")).properties( height=400, width=600).transform_filter( alt.datum.CAUSE == select1).transform_filter( alt.datum.YEAR == select2) return cha
scale=alt.Scale(domain=[1896, 2016]), axis=alt.Axis(title=" ", labels=False, values=year_unique)), alt.Y(field='Medal', type="quantitative", stack='zero', scale=alt.Scale(domain=[0, 200]), axis=alt.Axis(labelFontSize=15, titleFontSize=25)), alt.Color('Team:N', scale=alt.Scale(scheme='category10'), legend=alt.Legend(values=Germany_name, title="Germany", labelFontSize=15, symbolSize=200, titleFontSize=20)), opacity=alt.value(0.8)).transform_filter( alt.FieldOneOfPredicate(field='NOC', oneOf=Germany)).properties( title='Stream graph for 4 groups of countries', width=1000, height=300).interactive() Olympic_Germany # plot for Soviet Union Olympic_Soviet = alt.Chart(olympic_BC_countYear_filled).mark_area().encode( alt.X(field='Year', type="quantitative", scale=alt.Scale(domain=[1896, 2016]), axis=alt.Axis(title=" ", labels=False, values=year_unique)), alt.Y(field='Medal', type="quantitative", stack='zero', scale=alt.Scale(domain=[0, 200]),
y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')), color=alt.Color('Country:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=None), tooltip=['Country', 'Date', 'Confirmed Cases', 'Days since 100 cases']) country_selection = alt.selection_single( name='Select', fields=['Country'], bind=alt.binding_select( options=list(sorted(set(countries) - set(baseline_countries)))), init={'Country': 'US'}) date_filter = alt.datum['Date'] >= int(max_date.timestamp() * 1000) base2 = base.transform_filter( alt.FieldOneOfPredicate('Country', baseline_countries)) base3 = base.transform_filter(country_selection) base4 = base3.transform_filter(date_filter) max_day = dff2['Days since 100 cases'].max() ref = pd.DataFrame([[x, 100 * 1.33**x] for x in range(max_day + 1)], columns=['Days since 100 cases', 'Confirmed Cases']) base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q') base_ref_f = base_ref.transform_filter( alt.datum['Days since 100 cases'] >= max_day) chart5 = ( base_ref.mark_line(color='black', opacity=.5, strokeDash=[3, 3]) + base_ref_f.mark_text( dy=-6, align='right', fontSize=10, text='33% Daily Growth') +
data.movies.url, format=alt.DataFormat(parse={"Release_Date":"date"}) ) ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R'] genres = ['Action', 'Adventure', 'Black Comedy', 'Comedy', 'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical', 'Romantic Comedy', 'Thriller/Suspense', 'Western'] base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate( Rounded_IMDB_Rating = "floor(datum.IMDB_Rating)", Hundred_Million_Production = "datum.Production_Budget > 100000000.0 ? 100 : 10", Release_Year = "year(datum.Release_Date)" ).transform_filter( alt.datum.IMDB_Rating > 0 ).transform_filter( alt.FieldOneOfPredicate(field='MPAA_Rating', oneOf=ratings) ).encode( x=alt.X('Worldwide_Gross:Q', scale=alt.Scale(domain=(100000,10**9), clamp=True)), y='IMDB_Rating:Q', tooltip="Title:N" ) # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1) slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_") filter_year = base.add_selection( slider_selection ).transform_filter( slider_selection