Пример #1
0
(alt.Chart(marketing).mark_circle(size=50).encode(
    x='Salary', y='AmountSpent', color='Age').transform_filter(
        alt.FieldLTPredicate(field='Salary', lt=120000)).properties(height=400,
                                                                    width=500))
"""    The same filtering operation can also be done by using the datum module of Altair. It is simpler in terms of the syntax. The following code will create the same plot as above."""

from altair import datum
(alt.Chart(marketing).mark_circle(size=50).encode(
    x='Salary', y='AmountSpent',
    color='Age').transform_filter(datum.Salary < 120000).properties(height=400,
                                                                    width=500))
"""    Specify a condition for filtering based on a categorical column. For instance, the data points that belong to a set of discrete values can be filtered using the FieldOneOfPredicate method."""

(alt.Chart(marketing).mark_circle(size=50).encode(
    x='Salary', y='AmountSpent', color='Age').transform_filter(
        alt.FieldOneOfPredicate(field='Children',
                                oneOf=[0, 2, 3])).properties(height=400,
                                                             width=500))
"""Two plots:
        One will the a scatter plot that consists of the salary and amount spent columns. The other one will be a bar plot that shows the average salary for the categories in the age column. The second plot will also be used as a filter for the first plot.

Calculated the averages by applying the following transformation in the encode function
`y='mean(Salary):Q'`
"""

selection = alt.selection_multi(fields=['Age'])
first = (alt.Chart().mark_circle(size=50).encode(
    x='Salary',
    y='AmountSpent').transform_filter(selection).properties(height=300,
                                                            width=500))
second = (alt.Chart().mark_bar().encode(
    x='Age:O',
#create chloropleth map for San Antonio and surronding counties
sa_chart = alt.Chart(counties).mark_geoshape(stroke='grey').encode(
    color=alt.condition(selection, alt.value('black'), 'cases-per-100K:Q'),
    tooltip=[
        '[properties][NAME]:N', 'cases:Q', 'population:Q', 'cases-per-100K:Q'
    ]).transform_lookup(
        lookup='[properties][NAME]',
        from_=alt.LookupData(
            cumulative_cases[cumulative_cases['date'] == date_value], 'county',
            ['cases', 'population', 'cases-per-100K'])).properties(
                width=800,
                height=600).add_selection(selection).transform_filter(
                    alt.FieldOneOfPredicate(field='[properties][NAME]',
                                            oneOf=[
                                                'Bexar', 'Medina', 'Bandera',
                                                'Kendall', 'Comal',
                                                'Guadalupe', 'Wilson',
                                                'Atascosa'
                                            ]))

#create line chart for San Antonio and surrounding coounties
sa_line = alt.Chart(cumulative_cases).mark_line().encode(
    x='date',
    y='cases-per-100K:Q',
    color='county',
    tooltip=['date', 'county', 'cases', 'population',
             'cases-per-100K']).transform_filter(
                 alt.FieldOneOfPredicate(field='county',
                                         oneOf=[
                                             'Bexar', 'Medina', 'Bandera',
                                             'Kendall', 'Comal', 'Guadalupe',
# %%
heatmap_weekday_growth = (
    alt.Chart(heatmap_wrangled_filtered_grouped).mark_rect().encode(
        x=alt.X("weekday:O", title="Day of Week"),
        y=alt.Y("pd_week_number:O", title="Week #"),
        color=alt.Color("sum(minutes):Q",
                        scale=alt.Scale(scheme="warmgreys"),
                        legend=None),
        tooltip=[
            alt.Tooltip("monthdate(date):T", title="Date"),
            alt.Tooltip("sum(minutes):Q", title="Minutes"),
        ]
        # ).transform_filter(
        #    alt.FieldOneOfPredicate(field='weekday', oneOf=[1,2,3,4,5])
    ).transform_filter(
        alt.FieldOneOfPredicate(field="type", oneOf=growth_types)))

# heatmap_weekend_growth = alt.Chart(heatmap_wrangled_filtered_grouped).mark_rect().encode(
#    x= alt.X("weekday:O", title=None),
#    y= alt.Y("pd_week_number:O", axis=None),
#    color= alt.Color('sum(minutes):Q',scale=alt.Scale(scheme="warmgreys"), legend=None),
#    tooltip=[
#        alt.Tooltip('monthdate(date):T', title='Date'),
#        alt.Tooltip('sum(minutes):Q', title='Minutes')
#    ]
# ).transform_filter(
#    alt.FieldOneOfPredicate(field='weekday', oneOf=[6,7])
# ).transform_filter(
#    alt.FieldOneOfPredicate(field='type', oneOf = growth_types)
# )
Пример #4
0
                   ])

chart_data = data.loc[((data.Year == 1990) | (data.Year == 2010))
                      & (data['Unit Code'] == 'T_CO2_EQVT') &
                      (data.PowerCode == 'Thousands')]

# altair doesn't allow to use custom str values for group bar charts
descriptions = ['Energy', 'Manufacturing', 'Transport', 'Residential', 'Other']
fields = ['ENER_IND', 'ENER_MANUF', 'ENER_TRANS', 'ENER_OSECT', 'ENER_OTH']
for i in range(len(descriptions)):
    chart_data.replace(fields[i], descriptions[i], inplace=True, regex=True)

chart = alt.Chart(
    chart_data,
    title="Greenhouse gas emissions by sector in 1990 and 2010").mark_bar(
    ).transform_filter(alt.FieldOneOfPredicate(
        field='VAR', oneOf=descriptions)).transform_calculate(
            mill_value='datum.Value/1000', ).encode(
                alt.Y('mill_value:Q',
                      axis=alt.Axis(tickCount=5,
                                    domainWidth=0,
                                    title='CO2 equivalent (megatonnes)',
                                    titleFontWeight='lighter',
                                    titlePadding=10)),
                alt.Color('Year:O',
                          legend=alt.Legend(title='Years',
                                            titleFontSize=12,
                                            labelFontSize=12,
                                            titlePadding=10,
                                            titleFontWeight='lighter')),
                alt.X('Year:O', axis=None),
                alt.Column('VAR:N',
Пример #5
0
ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R']
genres = [
    'Action', 'Adventure', 'Black Comedy', 'Comedy', 'Concert/Performance',
    'Documentary', 'Drama', 'Horror', 'Musical', 'Romantic Comedy',
    'Thriller/Suspense', 'Western'
]

base = alt.Chart(movies, width=200,
                 height=200).mark_point(filled=True).transform_calculate(
                     Rounded_IMDB_Rating="floor(datum.IMDB_Rating)",
                     Hundred_Million_Production=
                     "datum.Production_Budget > 100000000.0 ? 100 : 10",
                     Release_Year="year(datum.Release_Date)").transform_filter(
                         alt.datum.IMDB_Rating > 0).transform_filter(
                             alt.FieldOneOfPredicate(
                                 field='MPAA_Rating',
                                 oneOf=ratings)).encode(x=alt.X(
                                     'Worldwide_Gross:Q',
                                     scale=alt.Scale(domain=(100000, 10**9),
                                                     clamp=True)),
                                                        y='IMDB_Rating:Q',
                                                        tooltip="Title:N")

# A slider filter
year_slider = alt.binding_range(min=1969, max=2018, step=1)
slider_selection = alt.selection_single(bind=year_slider,
                                        fields=['Release_Year'],
                                        name="Release Year_")

filter_year = base.add_selection(slider_selection).transform_filter(
    slider_selection).properties(title="Slider Filtering")
Пример #6
0
def error_span_durations(traceID):
    traces = load_traces()
    traces['error'] = traces['error'].fillna("false")
    traceWithError = traces.loc[traces['traceId'] == traceID]
    traceWithError = traceWithError.sort_values(by=['traceId', 'timestamp'],
                                                ascending=True).reset_index()
    traceWithErrorSpans = traceWithError.loc[traceWithError['error'] == True]

    traceWithError["start"] = 0
    traceWithError["end"] = 0

    spanCount = len(traceWithError)
    print(traceWithError.loc[0])
    traceWithError.loc[0, 'end'] = traceWithError.loc[0, 'duration']

    for i in range(1, spanCount):
        traceWithError.loc[i, 'start'] = traceWithError.loc[
            i, 'timestamp'] - traceWithError.loc[0, 'timestamp']
        traceWithError.loc[i, 'end'] = traceWithError.loc[i, 'start'] + (
            traceWithError.loc[i, 'duration'])

    #Get list of span service names associated with errors
    error_spans = []
    for i in range(len(traceWithError)):
        if traceWithError.loc[i, 'error'] == True:
            error_spans.append(traceWithError.loc[i, 'name'])

    #Generate histogram for all data points
    hist = alt.Chart().mark_bar().encode(
        y=alt.Y('count()', axis=alt.Axis()),
        x=alt.X('duration', axis=alt.Axis(title='Span Duration')),
        tooltip=['duration', 'error'],
        color=alt.Color(
            'error',
            title="Has Error",
            scale=alt.Scale(range=['#D62728', '#1F77B4']))).transform_filter(
                alt.FieldOneOfPredicate(field='name', oneOf=error_spans))

    #Generate chart with datapoints for span with error
    error_hist = alt.Chart().mark_bar(color='red').encode(
        y=alt.Y('count()',
                axis=alt.Axis(title='Count of Spans',
                              format=".0f",
                              tickMinStep=1)),
        x=alt.X('duration', axis=alt.Axis(title='Span Duration')),
        tooltip=['duration', 'error'],
        color=alt.Color(
            'traceId',
            scale=alt.Scale(range=['orange']))).interactive().transform_filter(
                (alt.datum.error == True)).transform_filter(
                    (alt.datum.traceId == traceID))

    summaries = []
    charts = []
    for name, duration in zip(traceWithErrorSpans.name,
                              traceWithErrorSpans.duration):
        subset = traces.loc[traces['name'] == name]
        percentage = ("%.1f" % (100 - (stats.percentileofscore(
            subset['duration'], duration, kind='weak'))))

        summary = f"{percentage}% of all {name} span durations are greater than the {name} span that errored in Trace ID: {traceID}."
        summaries.append(summary)

        chart = alt.layer(hist, error_hist, data=subset).properties(
            title=summary).resolve_scale(color='independent')
        charts.append(chart)

    #for i in range(len(charts)):
    #  return charts[0].to_json() |
    stackCharts = alt.vconcat(*charts)
    return stackCharts.to_json()
Пример #7
0
    color=alt.Color('variable_posneg', title='Impacto'),
    tooltip=[
        alt.Tooltip('mean(Mean_conditions):Q', title='Media', format=',.2f')
    ]).transform_filter(
        (datum.variable_category == 'Economía no básica')).transform_filter(
            alt.FieldOneOfPredicate(
                field='variable',
                oneOf=[
                    'Vacaciones No', 'Vacaciones_Sí', 'Alimentación No',
                    'Alimentación Sí', '"Colchón" económico Sí',
                    '"Colchón" económico No', 'Alto impacto coste vivienda',
                    'Bajo impacto coste vivienda',
                    'Impacto medio coste vivienda', 'Calefacción No',
                    'Calefacción Sí', 'Riesgo de pobreza No',
                    'Riesgo de pobreza Sí', 'Carencia Material Severa No',
                    'Carencia material severa', 'Enfermedades Crónicas No',
                    'Enfermedades Crónicas Sí', 'Limitaciones físicas No',
                    'Limitaciones físicas leves Sí',
                    'Limitaciones físicas graves Sí', 'Compra de ropa No',
                    'Compra de ropa Sí', 'Compra de zapatos No',
                    'Compra de zapatos Sí', 'Ocio con amigos No',
                    'Ocio con amigos Sí', 'Ocio en general No',
                    'Ocio en general Sí', 'Gasto en uno mismo No',
                    'Gasto en uno mismo Sí', 'Acceso a internet No',
                    'Acceso a internet Sí'
                ])).properties(height=300,
                               width=300,
                               title='Variables económicas hedonistas')

x2 = alt.Chart(df_vis_nac).mark_bar().encode(
    x=alt.X('variable:O', title=None),
Пример #8
0
        color=alt.Color('primary:N'),
        # column = alt.Column('date:T', timeUnit='year')
    )

    color = alt.Chart(df).mark_text(baseline='bottom').encode(
        x=alt.X('primary:N',
                sort=alt.Sort(field='count', op='mean', order='descending')),
        y=alt.Y('count:Q'),
        text='count:Q',
        # column = alt.Column('date:T', timeUnit='year')
    )

    chrts.append((chart + color).properties(
        width=280, title='Year ' + str(years[y])).transform_calculate(
            year='year(datum.date)').transform_filter(
                alt.FieldOneOfPredicate(field='year', oneOf=[int(
                    years[y])])).transform_aggregate(count='count()',
                                                     groupby=['primary']))

alt.hconcat(*chrts)

base.mark_trail().encode(x=alt.X('date:T', timeUnit='year'),
                         y=alt.Y('primary:N',
                                 sort=alt.Sort(op='count',
                                               order='descending')),
                         detail=alt.Detail('primary'),
                         size=alt.Size('count()',
                                       scale=alt.Scale(range=[1, 25])))
"""# Чернетка

# Task 1
## V4
Пример #9
0
def plot_time(query_string, highlight_country, year_range):
    """Function to create a time series plot showing the country-wise global obesity rates

    Function to create a time series(spaghetti) plot showing the global obesity rates
    for all the countries for a range of years as selected by the user

    Args:
        query_string ([str]): string containing the attributes to be used in a pandas query
                               for filtering the data for the bar plot

        highlight_country ([str]): name of the country to be highlighted in the time series plot

        year_range ([float]): range of years to be selected for the time series plot

    Returns:
        [altair chart]: An altair time series plot showing the country-wise global obesity rates
    """

    # Filter data
    ob_yr = he.make_rate_data(["country", "year"], ["obese"], query_string)

    # Create labels
    title_label = "World Obesity"
    sub_label = str(year_range[0]) + "-" + str(year_range[1])

    # Format country
    highlight_country = ([highlight_country] if type(highlight_country) == str
                         else highlight_country)

    # Get data for highlighted countries
    highlighted_data = ob_yr[ob_yr["country"].isin(highlight_country)]
    highlighted_data.loc[:, "highlighted"] = [
        country if country in highlight_country else "other"
        for country in highlighted_data["country"]
    ]

    # Create chart
    country_time_chart = (
        alt.Chart(
            ob_yr, title=alt.TitleParams(
                text=title_label, subtitle=sub_label)).mark_line().encode(
                    x=alt.X(
                        "year:O",
                        scale=alt.Scale(zero=False),
                        title="Years",
                        axis=alt.Axis(grid=False),
                    ),
                    y=alt.Y(
                        "obese:Q",
                        title="Obesity Rate",
                        axis=alt.Axis(format="%"),
                    ),
                    color=alt.condition(
                        alt.Predicate(
                            alt.FieldOneOfPredicate(field="country",
                                                    oneOf=highlight_country)),
                        "country",
                        alt.value("lightgray"),
                        # legend=None,
                    ),
                    opacity=alt.condition(
                        alt.Predicate(
                            alt.FieldOneOfPredicate(field="country",
                                                    oneOf=highlight_country)),
                        alt.value(1),
                        alt.value(0.2),
                    ),
                    tooltip="country",
                ).properties(width=450, height=300).interactive())

    highlighted_time_chart = (alt.Chart(highlighted_data).mark_line().encode(
        x=alt.X(
            "year:O",
            scale=alt.Scale(zero=False),
            title="Years",
            axis=alt.Axis(grid=False),
        ),
        y=alt.Y(
            "obese:Q",
            title="Obesity Rate",
            axis=alt.Axis(format="%"),
        ),
        color=alt.Color(
            "highlighted",
            legend=alt.Legend(title="Countries", values=highlight_country),
        ),
        tooltip="country",
    ))

    # return country_time_chart
    return country_time_chart + highlighted_time_chart
Пример #10
0
def generate_line_chart(
    years: list,
    width: int = 700,
    height: int = 500,
) -> alt.Chart:
    # filter data
    calendar_pd_filtered = calendar_pd.drop_duplicates(subset=['date'],
                                                       keep='first')
    calendar_pd_filtered = calendar_pd_filtered[
        calendar_pd_filtered.price < 5000]
    calendar_pd_filtered = calendar_pd_filtered[
        calendar_pd_filtered.price.notna()]

    # select a point for which to provide details-on-demand
    click = alt.selection_multi(
        fields=['year'],  # limit selection to x-axis value
        empty='none',  # empty selection includes no data points
        init=[{
            "year": years[0]
        }])

    # select a point for which to provide details-on-demand
    hover = alt.selection_single(
        encodings=['x'],  # limit selection to x-axis value
        on='mouseover',  # select on mouseover events
        nearest=True,  # select data point nearest the cursor
        empty='none'  # empty selection includes no data points
    )

    # legend
    legend = alt.Chart(calendar_pd_filtered).mark_point().encode(
        y=alt.Y('year:N', axis=alt.Axis(orient='right')),
        color=alt.condition(
            click, alt.Color('year:N', legend=None),
            alt.value('lightgray'))).add_selection(click).transform_filter(
                alt.FieldOneOfPredicate(field='year', oneOf=years))

    # define our base line chart of stock prices
    base = alt.Chart(calendar_pd_filtered).mark_line().encode(
        x=alt.X('partial_date:T'),
        y=alt.Y('price:Q'),
        color=alt.Color('year:N', legend=None)).transform_filter(
            alt.FieldOneOfPredicate(field='year', oneOf=years))

    final_chart = alt.layer(
        base.encode(opacity=alt.condition(click, alt.value(1), alt.value(
            0.1))),  # base line chart

        # add a rule mark to serve as a guide line
        alt.Chart(calendar_pd_filtered).mark_rule(color='#aaa').encode(
            x='partial_date:T').transform_filter(hover),

        # add circle marks for selected time points, hide unselected points
        base.mark_circle().encode(
            opacity=alt.condition(hover, alt.value(1), alt.value(0))
        ).add_selection(hover).transform_filter(click),

        # add white stroked text to provide a legible background for labels
        base.mark_text(
            align='left', dx=5, dy=-5, stroke='white',
            strokeWidth=2).encode(text=alt.Text('label:N')).
        transform_filter(hover).transform_filter(click).transform_calculate(
            label=f'"price:$" + datum.price + " date:" + datum.clean_date'),

        # add text labels for stock prices
        base.mark_text(align='left', dx=5, dy=-5).encode(
            text='label:N'
        ).transform_filter(hover).transform_filter(click).transform_calculate(
            label=f'"price:$" + datum.price + " date:" + datum.clean_date'),
    ).properties(width=width, height=height,
                 title="price over time").interactive() | legend

    return final_chart
Пример #11
0
def plot_times_series_prediction(df_,
                                 prediciton_mean_,
                                 prediction_ci_=None,
                                 width=800,
                                 height=150):
    '''Dibuja la prediccion con el intervalo de confianza si es proporcionado
        df_ = dataframe con la serie temporal (fechas en el indice)
        prediciton_mean_ = Salida del modelo con la media de la prediccion (fechas en el indice)
        prediction_ci_ = Intercalos de confianza ('upper y' y 'lower y')(fechas en el indice)
        '''
    if prediction_ci_ is not None:
        prediction_ci = prediction_ci_.copy()
    prediciton_mean_ = prediciton_mean_.copy()
    df = df_.copy()

    #Creacion del data frame para pintado
    prediciton_mean_df = pd.DataFrame(prediciton_mean, columns=['prediction'])
    if prediction_ci_ is not None:
        prediction_ci_df = pd.DataFrame(prediction_ci)
        prediction_df = prediction_ci_df.merge(prediciton_mean_df,
                                               left_index=True,
                                               right_index=True)
    else:
        prediction_df = prediciton_mean_df

    df_plot = df.merge(prediction_df,
                       left_index=True,
                       right_index=True,
                       how='outer')
    df_plot.reset_index(inplace=True, )
    df_plot = df_plot.melt(id_vars=['index'])
    df_plot.dropna(inplace=True)

    #Tema para el plot
    scheme = alt.Scale(scheme='tableau10')

    #Filtrado por barrido
    brush = alt.selection(type="interval", encodings=["x"])

    #Plot times series principal
    main_times_series_plot = alt.Chart(df_plot).mark_line(
        point=False, opacity=0.7).encode(
            x=alt.X('index:T'),
            y=alt.Y('value:Q', axis=alt.Axis(grid=True)),
            color=alt.Color('variable',
                            scale=scheme,
                            legend=alt.Legend(title='Leyenda',
                                              symbolSize=30,
                                              symbolType='circle',
                                              symbolStrokeWidth=5,
                                              orient="top",
                                              direction="horizontal",
                                              labelFont='Calibri',
                                              labelFontSize=12,
                                              titleFont='Calibri',
                                              titleFontSize=15,
                                              titleFontWeight=900))
        ).properties(width=width,
                     height=height,
                     title='Serie completa para el filtrado').transform_filter(
                         alt.FieldOneOfPredicate(field='variable',
                                                 oneOf=['y', 'prediction']))

    #Plot de la banda de confianza si hay
    if prediction_ci_ is not None:
        band = alt.Chart(df_plot).mark_errorband(
            extent='ci', color='red', opacity=0.1).encode(
                x=alt.X('index:T'), y=alt.Y('value:Q')).transform_filter(
                    alt.FieldOneOfPredicate(field='variable',
                                            oneOf=['lower y', 'upper y']))

    #Plot del segundo grafico con tooltip y puntos
    second_times_series_plot = alt.Chart(df_plot).mark_line(
        point=True, opacity=0.7).encode(
            x=alt.X('index:T'),
            y=alt.Y('value:Q', axis=alt.Axis(grid=True)),
            color=alt.Color('variable',
                            scale=scheme,
                            legend=alt.Legend(title='Leyenda',
                                              symbolSize=30,
                                              symbolType='circle',
                                              symbolStrokeWidth=5,
                                              orient="top",
                                              direction="horizontal",
                                              labelFont='Calibri',
                                              labelFontSize=12,
                                              titleFont='Calibri',
                                              titleFontSize=15,
                                              titleFontWeight=900)),
            tooltip=['index:T', 'value:Q', 'variable']).properties(
                width=width, height=height,
                title='Serie ampliada').transform_filter(
                    alt.FieldOneOfPredicate(field='variable',
                                            oneOf=['y', 'prediction']))

    if prediction_ci_ is not None:
        t1 = band + main_times_series_plot.add_selection(brush)
        t2 = (band + second_times_series_plot).transform_filter(brush)
        return (t1 & t2)
    else:
        t1 = main_times_series_plot.add_selection(brush)
        t2 = (second_times_series_plot).transform_filter(brush)
        return (t1 & t2)
Пример #12
0
def app():

    ####### Datasets
    
    control_dataset = 'https://raw.githubusercontent.com/JulioCandela1993/VisualAnalytics/master/data/control_policy.csv'
    deaths_dataset = 'https://raw.githubusercontent.com/JulioCandela1993/VisualAnalytics/master/data/deaths.csv'
    
    
    ####### Dataframes
    
    control_df = pd.read_csv(control_dataset)
    
    
    ####### Dashboard
    
    st.title("Tobacco Control")
    
    
    
    
    st.markdown('''
    The following analysis is based on the evaluation made by World Health Organization (WHO) 
    to country policies against Tobacco. A score from 1 to 5 is assigned depending on the intensity 
    of a country to deal with Tobacco issues being 1 the worst and 5 the best
    ''')
    
    ####### Control Measures given by WHO
    
    control_metrics = ["Monitor",	
               "Protect from tobacco smoke",	
               "Offer help to quit tobacco use", 
               "Warn about the dangers of tobacco",
               "Enforce bans on tobacco advertising",
               "Raise taxes on tobacco",
               #"Anti-tobacco mass media campaigns"
    ]
    
    
    # Main Selector of Control Measures
    cols = st.selectbox('Select control measure: ', control_metrics)
    
    if cols in control_metrics:   
        metric_to_show_in_covid_Layer = cols +":Q"
        metric_name = cols
       
    years = ['2008', '2010', '2012', '2014', '2016', '2018']
    columns_year = [metric_name+" "+str(year) for year in years]
    columns = ["d" +str(year) for year in years]
        
        
    container_map = st.beta_container()
    
    
    ####### Map Visualization
    
    with container_map:
        
        st.header("How are countries controlling Tobacco consumption?")
        #st.header('"'A global view of the implementation of the policy """ around the world'"')
    
        st.markdown('''
        In the folling map, we can identify the intensity of a specific control policy for each country. 
        We can also see the evolution of these policies from 2008 to 2018
        ''')
        
        # Year Selector
        select_year_list = st.selectbox('Select year: ', years)#st.slider('Select year: ', 2008, 2018, 2008, step = 2)
        select_year = int(select_year_list)

        # Map Topology
        url_topojson = 'https://raw.githubusercontent.com/JulioCandela1993/VisualAnalytics/master/world-countries.json'
        data_topojson_remote = alt.topo_feature(url=url_topojson, feature='countries1')
        
        
        ### Map Chart
        
        map_geojson = alt.Chart(data_topojson_remote).mark_geoshape(
            stroke="black",
            strokeWidth=1,
            #fill='lightgray'
        ).encode(
            color=alt.Color(metric_to_show_in_covid_Layer),
        ).transform_lookup(
                lookup="properties.name",
                from_=alt.LookupData(control_dataset, "Country", [metric_name,"Year"])
        ).properties(
            width=700,
            height=500
        )
              
        choro = alt.Chart(data_topojson_remote, title = 'Implementation of the policy "' +metric_name+'" around the world').mark_geoshape(
            stroke='black'
        ).encode(
            color=alt.Color(metric_to_show_in_covid_Layer, 
                            scale=alt.Scale(range=['#ffe8dd','#ef4f4f']),
                            legend=None),
            tooltip=[
                alt.Tooltip("properties.name:O", title="Country"),
                alt.Tooltip(metric_to_show_in_covid_Layer, title=metric_name),
                alt.Tooltip("year:Q", title="Year"),
            ],
        ).transform_calculate(
            d2008 = "1",
            d2010 = "1",
            d2012 = "1",
            d2014 = "1",
            d2016 = "1",
            d2018 = "1"
        ).transform_fold(
            columns, as_=['year', 'metric']
        ).transform_calculate(
            yearQ = 'replace(datum.year,"d","")'
        ).transform_calculate(
            key_val = 'datum.properties.name + datum.yearQ'
        ).transform_lookup(
                lookup="key_val",
                from_=alt.LookupData(control_dataset, "ID", [metric_name,"Year"])
        ).transform_calculate(
            year='parseInt(datum.Year)',
        ).transform_filter(
            alt.FieldEqualPredicate(field='year', equal=select_year)
        )
    
    
        st.altair_chart(map_geojson + choro)
            
        ## Qualification array
        
        qualifications = pd.DataFrame.from_dict({
            "keys": [1,2,3,4,5],
            "category":["1.Very Bad", "2.Bad", "3.Medium", "4.Good", "5.Perfect"]
            })
        
        ## Legend Chart
        
        ##### Data Transformations

        legend_info = alt.Chart(control_dataset).transform_joinaggregate(
            num_countries='count(*)',
        ).transform_filter(
            alt.FieldEqualPredicate(field='Year', equal=select_year)
        ).transform_lookup(
                lookup=metric_name,
                from_=alt.LookupData(qualifications, "keys", ["category"])
        ).transform_aggregate(
            count='count()',
            groupby=[metric_name,"category"]
        ).transform_joinaggregate(
            total='sum(count)'  
        ).transform_calculate(
            pct='datum.count / datum.total'  
        )
        
        legend_bar = legend_info.mark_bar().encode(
                x=alt.X('pct:Q', stack="normalize", sort=alt.SortField(metric_to_show_in_covid_Layer), title = None, axis = None),                
                color=alt.Color(metric_to_show_in_covid_Layer,
                            scale=alt.Scale(range=['#ffe8dd','#ef4f4f'])
                            ,legend = None),
                tooltip=[
                    alt.Tooltip(metric_to_show_in_covid_Layer, title=metric_name)
                ],
        )
        
        legend_value = legend_info.mark_text(dx = -11, align='center', baseline='middle', color='black', fontWeight = "bold").encode(
            x=alt.X('pct:Q', sort=alt.SortField(metric_to_show_in_covid_Layer), stack='normalize', axis = None),
            #detail = metric_to_show_in_covid_Layer,
            color=alt.Color(metric_name +":O",
                            scale=alt.Scale(range=['#000000','#000000'])
                            ,legend = None),
            text=alt.Text('pct:Q',format='.0%')
        )
        
        legend_category = legend_info.mark_text(dx = 10, dy = 10, align='left', baseline='middle', color='black', angle = 90, fontWeight = "bold").encode(
            x=alt.X('pct:Q', sort=alt.SortField(metric_to_show_in_covid_Layer), stack='normalize', axis = None),
            #detail = metric_to_show_in_covid_Layer,
            color=alt.Color(metric_name +":O",
                            scale=alt.Scale(range=['#000000','#000000'])
                            ,legend = None),
            #text=alt.Text(metric_to_show_in_covid_Layer)
            text=alt.Text("category:N")
        )
            
        legend_chart = (legend_bar + legend_value + legend_category).properties(
            width=700,
            height=100,
            title = metric_name
        ).configure_title(align = "left"
        ).configure_view(
            strokeWidth=0
        )
        
        
        st.altair_chart(legend_chart)
        
    ##### Evolution of policy per selected countries    
        
    container_policycountry = st.beta_container()
    with container_policycountry:   
        
        st.header("Evolution of the policy per country")
        
        
        st.markdown('''
        
        In addition, we can evaluate and compare the evolution of the selected policy among different countries of our interest:
        
        ''')
    
    
        ## Selector of countries
        
        countries = st.multiselect('Select countries to plot',
                                    control_df.groupby('Country').count().reset_index()['Country'].tolist(),
                                    default=['China', 'India', 'France'])
        
        st.markdown('''
        
    
        ''') 
        
        xscale_barchart = alt.Scale(domain=(0, 5))
        
        ## Comparisson Chart of Policies per country
        
        barchart_country = alt.Chart(control_dataset,width=90,height=20,
                                     title = 'Evolution of Policy "' + metric_name + '" per selected countries'
        ).mark_bar( size = 20
        ).encode(
            alt.X('value:Q', scale = xscale_barchart, title = "", axis = alt.Axis(grid = False)),
            alt.Row('Country:N', title = "", spacing = 5, header = alt.Header(labelAngle = 0, labelAlign = "left",labelLimit = 100)),
            alt.Column("Year:O", title = "", spacing = 10),
            color=alt.Color("value:Q", 
                            scale=alt.Scale(domain=[1,4], range=['#ffe8dd','#ef4f4f']),
                            legend=None),
            tooltip=[
                alt.Tooltip("Country:N", title="Country"),
                alt.Tooltip(metric_to_show_in_covid_Layer, title=metric_name),
                alt.Tooltip("Year:O", title="Year"),
            ]
        ).transform_fold(
            [metric_name],
            as_ = ['Measurement_type', 'value']
        ).transform_filter(
            alt.FieldOneOfPredicate(field="Country", oneOf=countries)
        ).transform_filter(
            {'field': 'Year', 'range': [2008,2018]}
        ).configure_title(align = "center", anchor = "middle", dy = -10)
            
            
        st.altair_chart(barchart_country)
        
        st.altair_chart(legend_chart)
        
    
    ####### Scatterplot control policy vs deaths
    
    def render_latex(formula, fontsize=10, dpi=100):
        """Renders LaTeX formula into Streamlit."""
        fig = plt.figure()
        text = fig.text(0, 0, '$%s$' % formula, fontsize=fontsize)
    
        fig.savefig(BytesIO(), dpi=dpi)  # triggers rendering
    
        bbox = text.get_window_extent()
        width, height = bbox.size / float(dpi) + 0.05
        fig.set_size_inches((width, height))
    
        dy = (bbox.ymin / float(dpi)) / height
        text.set_position((0, -dy))
    
        buffer = BytesIO()
        fig.savefig(buffer, dpi=dpi, format='jpg')
        plt.close(fig)
    
        st.image(buffer)
    
    
    
    container_correlation = st.beta_container()
    with container_correlation: 
    
        st.header("Are the policies having an impact in the deaths by Smoking?")
        
        st.markdown('''
        Countries have implemented different control policies against Tobacco which have been measured by WHO from 2008 until 2018. 
        During this period, some countries have strengthen their policies; however, we don't know the real impact of them.
        
        As a consequence, the following visualization measures the correlation of the change in control policies with 
        respect to the change in deaths by Smoking. The definitions of % of change are the following:
            
        ''')    
        
        render_latex(r'\%\ change\ in\ '+metric_name+r'\ =\ \frac{'+metric_name+r'\ in\ 2016}{'+metric_name+r'\ in\ 2008}')
        
        render_latex(r'\%\ change\ in\ Deaths\ by\ Smoking\ =\ \frac{Deaths\ by\ Smoking\ in\ 2016}{Deaths\ by\ Smoking\ in\ 2008}')
        
        st.markdown('''
        The user can also select brush the histograms in order to filter the points and 
        evaluate the slope of the regression in more detail (with groups that increased more or less in control policies, for example)
        
        
        ''')
    
        brush = alt.selection_interval()
        
        ## Data Transformations
        
        base_scatter = alt.Chart(control_dataset).transform_lookup(
                lookup="ID",
                from_=alt.LookupData(deaths_dataset, "ID", ["deaths","Year"])
        ).transform_calculate(
            deaths='parseFloat(datum.deaths)',
            year='parseInt(datum.Year)',
            metric = alt.datum[metric_name]
        ).transform_calculate(
            deaths_2016='datum.year==2016?datum.deaths:0',
            deaths_2008='datum.year==2008?datum.deaths:0',
            metric_2016='datum.year==2016?datum.metric:0',
            metric_2008='datum.year==2008?datum.metric:0',
            year='parseInt(datum.Year)',
            sizepoint = '2'
        ).transform_aggregate(
            deaths_2016='sum(deaths_2016)',
            metric_2016='sum(metric_2016)',
            deaths_2008='sum(deaths_2008)',
            metric_2008='sum(metric_2008)',
            groupby=["Country"]
        ).transform_calculate(
            incr_ratio_deaths='((datum.deaths_2016/datum.deaths_2008)-1)*100',
            incr_ratio_metric='((datum.metric_2016/datum.metric_2008)-1)*100',
        )
        
        xscale = alt.Scale(domain=(-100, 300))
        yscale = alt.Scale(domain=(-100, 200))
        
        
        ## Scatterplot of changes in Policy and changes in deaths
        
        points_scatter = base_scatter.mark_point(size=50, stroke="#ef4f4f").encode(
            alt.X('incr_ratio_metric:Q', scale = xscale, title = '% change of efforts in ' + metric_name + ' from 2008 to 2016'),
            alt.Y('incr_ratio_deaths:Q', scale=yscale, title = '% change in deaths from 2008 to 2016'),
            #color=alt.condition(brush, alt.value('blue'), alt.value('lightgray')),  
            #opacity=alt.condition(brush, alt.value(0.75), alt.value(0.05)),
            tooltip=[
                        alt.Tooltip("deaths_2016:Q", title="Deaths in 2016"),
                        alt.Tooltip("deaths_2008:Q", title="Deaths in 2008"),
                        alt.Tooltip("Country:N", title="Country"),
                    ],
        ).properties(
            width=450,
            height=450
        ).transform_filter(brush)   
            
        regression_scatter = points_scatter.transform_regression(
                on='incr_ratio_metric', regression='incr_ratio_deaths', method = 'linear'
        ).mark_line(color='#19456b')
        
        scatter_final = (points_scatter + regression_scatter)
           
        # Histogram of changes in policy
        
        top_hist = base_scatter.mark_area(line=True, opacity=0.3).encode(
            alt.X("incr_ratio_metric:Q",
                  bin=alt.Bin(maxbins=30, extent=xscale.domain),
                  title=''
                  ),
            alt.Y('count()', title=''),
            color=alt.value("#ef4f4f")
        ).add_selection(
            brush
        ).properties(width=450 , height=100, title = "Distribution of % change in policy")
        
        # Histogram of changes in deaths    
            
        right_hist = base_scatter.mark_area(line=True, opacity=0.3).encode(
            alt.Y('incr_ratio_deaths:Q',
                  bin=alt.Bin(maxbins=20, extent=yscale.domain),
                  title='',
                  ),
            alt.X('count()', title=''),
            color=alt.value("#ef4f4f")
        ).add_selection(
            brush
        ).properties(width=110, height=450, 
                     title=alt.TitleParams(text="Distribution of % change in deaths", align="center", angle = 90, orient = 'right')
        )
        
        st.altair_chart((top_hist & (scatter_final |right_hist )
            ).properties(title = "Correlation between % change in policy and % change in deaths"
            ).configure_title(align = "center", anchor = "middle", dy = -10))
Пример #13
0
def graph_compare_cea2034(df, graph_params, speaker1, speaker2):
    selection1, selection2, selectorsMeasurements, scales = build_selections(
        df, speaker1, speaker2)

    # TODO(move to parameters)
    x_axis = alt.X('Freq:Q',
                   scale=alt.Scale(type="log", domain=[20, 20000], nice=False))
    y_axis = alt.Y('dB:Q', scale=alt.Scale(zero=False, domain=[-40, 10]))
    color = alt.Color('Measurements', type='nominal', sort=None)
    opacity = alt.condition(selectorsMeasurements, alt.value(1),
                            alt.value(0.2))

    line = alt.Chart(df).transform_filter(
        alt.FieldOneOfPredicate(field='Measurements',
                                oneOf=[
                                    'On Axis', 'Listening Window',
                                    'Early Reflections', 'Sound Power'
                                ])).encode(x=x_axis,
                                           y=y_axis,
                                           color=color,
                                           opacity=opacity)
    points = line.mark_circle(size=100).encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0)),
        tooltip=['Measurements', 'Freq', 'dB'])

    di_axis = alt.Y('dB:Q',
                    scale=alt.Scale(zero=False, domain=[-10, 40], nice=False))
    di = alt.Chart(df).transform_filter(
        alt.FieldOneOfPredicate(
            field='Measurements',
            oneOf=['Early Reflections DI',
                   'Sound Power DI'])).encode(x=x_axis,
                                              y=di_axis,
                                              color=color,
                                              opacity=opacity)
    points_di = di.mark_circle(size=100).encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0)),
        tooltip=['Measurements', 'Freq', 'dB'])

    spin_full = alt.layer(points + line.mark_line(),
                          points_di + di.mark_line(clip=True)).resolve_scale(
                              y='independent').properties(width=600,
                                                          height=300)

    spin_dash = alt.layer(
        points + line.mark_line(strokeDash=[4, 2]),
        points_di + di.mark_line(clip=True, strokeDash=[4, 2])).resolve_scale(
            y='independent').properties(width=600, height=300)

    line1 = spin_full.add_selection(selection1).transform_filter(selection1)
    line2 = spin_dash.add_selection(selection2).transform_filter(selection2)

    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
    rules = alt.Chart(df).mark_rule(color='gray').encode(
        x='Freq:Q').transform_filter(nearest)

    layers = alt.layer(
        line2, line1,
        rules).add_selection(selectorsMeasurements).add_selection(
            scales).add_selection(nearest).interactive()
    return layers
Пример #14
0
def graph_spinorama(dfu, graph_params):
    xmin = graph_params['xmin']
    xmax = graph_params['xmax']
    ymin = graph_params['ymin']
    ymax = graph_params['ymax']
    if xmax == xmin:
        logging.error('Graph configuration is incorrect: xmin==xmax')
    if ymax == ymin:
        logging.error('Graph configuration is incorrect: ymin==ymax')
    # add selectors
    selectorsMeasurements = alt.selection_multi(fields=['Measurements'],
                                                bind='legend')
    scales = alt.selection_interval(bind='scales')
    # main charts
    xaxis = alt.X('Freq:Q',
                  title='Freqency (Hz)',
                  scale=alt.Scale(type='log',
                                  base=10,
                                  nice=False,
                                  domain=[xmin, xmax]),
                  axis=alt.Axis(format='s'))
    yaxis = alt.Y('dB:Q',
                  title='Sound Pressure (dB)',
                  scale=alt.Scale(zero=False, domain=[ymin, ymax]))
    # why -10?
    di_yaxis = alt.Y('dB:Q',
                     title='Sound Pressure DI (dB)',
                     scale=alt.Scale(zero=False, domain=[-5, ymax - ymin - 5]))
    color = alt.Color('Measurements', type='nominal', sort=None)
    opacity = alt.condition(selectorsMeasurements, alt.value(1),
                            alt.value(0.2))

    line = alt.Chart(dfu).mark_line().transform_filter(
        alt.FieldOneOfPredicate(field='Measurements',
                                oneOf=[
                                    'On Axis', 'Listening Window',
                                    'Early Reflections', 'Sound Power'
                                ])).encode(x=xaxis,
                                           y=yaxis,
                                           color=color,
                                           opacity=opacity)

    circle = alt.Chart(dfu).mark_circle(size=100).transform_filter(
        alt.FieldOneOfPredicate(
            field='Measurements',
            oneOf=[
                'On Axis', 'Listening Window', 'Early Reflections',
                'Sound Power'
            ])).encode(x=xaxis,
                       y=yaxis,
                       color=color,
                       opacity=alt.condition(nearest, alt.value(1),
                                             alt.value(0)),
                       tooltip=['Measurements', 'Freq', 'dB'])

    di = alt.Chart(dfu).mark_line().transform_filter(
        alt.FieldOneOfPredicate(
            field='Measurements',
            oneOf=['Early Reflections DI',
                   'Sound Power DI'])).encode(x=xaxis,
                                              y=di_yaxis,
                                              color=color,
                                              opacity=opacity)

    circle_di = alt.Chart(dfu).mark_circle(size=100).transform_filter(
        alt.FieldOneOfPredicate(
            field='Measurements',
            oneOf=['Early Reflections DI', 'Sound Power DI'
                   ])).encode(x=xaxis,
                              y=di_yaxis,
                              color=color,
                              opacity=alt.condition(nearest, alt.value(1),
                                                    alt.value(0)),
                              tooltip=['Measurements', 'Freq', 'dB'])

    # assemble elements together
    spin = alt.layer(circle + line, circle_di + di).resolve_scale(
        y='independent').add_selection(selectorsMeasurements).add_selection(
            scales).add_selection(nearest).properties(
                width=graph_params['width'], height=graph_params['height'])

    return spin
Пример #15
0
def stack_bar_chart():
    rl_vio = doc(0)
    #rl_vio["YEAR"] = rl_vio["YEAR"].astype("int")
    source = rl_vio[rl_vio["YEAR"] > 2014]
    crash_type = [
        "FAILING TO REDUCE SPEED TO AVOID CRASH",
        "FAILING TO YIELD RIGHT-OF-WAY", "FOLLOWING TOO CLOSELY",
        "IMPROPER LANE USAGE", "IMPROPER OVERTAKING/PASSING"
    ]
    st.sidebar.title("What causes the accidents?")
    select1 = st.sidebar.selectbox("Choose the crash type: ", crash_type)
    select2 = st.sidebar.selectbox("Choose the year: ",
                                   [2015, 2016, 2017, 2018, 2019, 2020, 2021])
    st.text("Go back to see all types of causes? Click 'View All'!")
    if st.button("View All"):
        cha = alt.Chart(source).mark_bar(size=20).encode(
            alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS)"]),
            alt.Y('YEAR:O',
                  title="Year",
                  axis=alt.Axis(grid=False, labelAngle=0)),
            alt.X('sum(RECORDS)',
                  axis=alt.Axis(grid=False, labelAngle=0),
                  title="Records"),
            color="CAUSE",
            order=alt.Order(
                # Sort the segments of the bars by this field
                'CAUSE',
                sort='ascending')).properties(
                    height=400, width=850).transform_filter(
                        alt.FieldOneOfPredicate(
                            field='CAUSE', oneOf=crash_type)).interactive()
    else:
        if select1 in crash_type:
            cha = alt.Chart(source).mark_bar(size=20).encode(
                alt.Tooltip(["CAUSE:N", "sum(RECORDS)"]),
                alt.Y('YEAR:O',
                      axis=alt.Axis(grid=False, labelAngle=0),
                      title="Year"),
                alt.X('sum(RECORDS)',
                      axis=alt.Axis(grid=False, labelAngle=0, tickMinStep=1),
                      title="Records"),
                color=alt.value("#e7ba52")).properties(
                    height=400,
                    width=600).transform_filter(alt.datum.CAUSE == select1)
            if select2:
                cha = alt.Chart(source).mark_bar(size=20).encode(
                    alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS)"]),
                    alt.Y('MONTH:O',
                          axis=alt.Axis(grid=False, labelAngle=0),
                          title="Month"),
                    alt.X('sum(RECORDS)',
                          axis=alt.Axis(grid=False,
                                        labelAngle=0,
                                        tickMinStep=1),
                          title="Records"),
                    color=alt.value("darkgray")).properties(
                        height=400, width=600).transform_filter(
                            alt.datum.CAUSE == select1).transform_filter(
                                alt.datum.YEAR == select2)

    return cha
Пример #16
0
          scale=alt.Scale(domain=[1896, 2016]),
          axis=alt.Axis(title=" ", labels=False, values=year_unique)),
    alt.Y(field='Medal',
          type="quantitative",
          stack='zero',
          scale=alt.Scale(domain=[0, 200]),
          axis=alt.Axis(labelFontSize=15, titleFontSize=25)),
    alt.Color('Team:N',
              scale=alt.Scale(scheme='category10'),
              legend=alt.Legend(values=Germany_name,
                                title="Germany",
                                labelFontSize=15,
                                symbolSize=200,
                                titleFontSize=20)),
    opacity=alt.value(0.8)).transform_filter(
        alt.FieldOneOfPredicate(field='NOC', oneOf=Germany)).properties(
            title='Stream graph for 4 groups of countries',
            width=1000,
            height=300).interactive()
Olympic_Germany

# plot for Soviet Union
Olympic_Soviet = alt.Chart(olympic_BC_countYear_filled).mark_area().encode(
    alt.X(field='Year',
          type="quantitative",
          scale=alt.Scale(domain=[1896, 2016]),
          axis=alt.Axis(title=" ", labels=False, values=year_unique)),
    alt.Y(field='Medal',
          type="quantitative",
          stack='zero',
          scale=alt.Scale(domain=[0, 200]),
Пример #17
0
    y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')),
    color=alt.Color('Country:N',
                    scale=alt.Scale(domain=color_domain, range=color_range),
                    legend=None),
    tooltip=['Country', 'Date', 'Confirmed Cases', 'Days since 100 cases'])

country_selection = alt.selection_single(
    name='Select',
    fields=['Country'],
    bind=alt.binding_select(
        options=list(sorted(set(countries) - set(baseline_countries)))),
    init={'Country': 'US'})

date_filter = alt.datum['Date'] >= int(max_date.timestamp() * 1000)
base2 = base.transform_filter(
    alt.FieldOneOfPredicate('Country', baseline_countries))
base3 = base.transform_filter(country_selection)
base4 = base3.transform_filter(date_filter)

max_day = dff2['Days since 100 cases'].max()
ref = pd.DataFrame([[x, 100 * 1.33**x] for x in range(max_day + 1)],
                   columns=['Days since 100 cases', 'Confirmed Cases'])
base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q',
                                 y='Confirmed Cases:Q')
base_ref_f = base_ref.transform_filter(
    alt.datum['Days since 100 cases'] >= max_day)

chart5 = (
    base_ref.mark_line(color='black', opacity=.5, strokeDash=[3, 3]) +
    base_ref_f.mark_text(
        dy=-6, align='right', fontSize=10, text='33% Daily Growth') +
Пример #18
0
    data.movies.url,
    format=alt.DataFormat(parse={"Release_Date":"date"})
)
ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R']
genres = ['Action', 'Adventure', 'Black Comedy', 'Comedy',
       'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical',
       'Romantic Comedy', 'Thriller/Suspense', 'Western']

base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate(
    Rounded_IMDB_Rating = "floor(datum.IMDB_Rating)",
    Hundred_Million_Production =  "datum.Production_Budget > 100000000.0 ? 100 : 10",
    Release_Year = "year(datum.Release_Date)"
).transform_filter(
    alt.datum.IMDB_Rating > 0
).transform_filter(
    alt.FieldOneOfPredicate(field='MPAA_Rating', oneOf=ratings)
).encode(
    x=alt.X('Worldwide_Gross:Q', scale=alt.Scale(domain=(100000,10**9), clamp=True)),
    y='IMDB_Rating:Q',
    tooltip="Title:N"
)

# A slider filter
year_slider = alt.binding_range(min=1969, max=2018, step=1)
slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_")


filter_year = base.add_selection(
    slider_selection
).transform_filter(
    slider_selection