Пример #1
0
def get_chart(keyword):
    db_string = "postgres://*****:*****@postgres:5432/shared"
    if keyword == "*":
        query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf"
    else:
        query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf where article_id in (select article_id from tb_news_covid_mexico_date_text where clean_text LIKE '%" + keyword + "%' )"
    db = create_engine(db_string)
    df = pd.read_sql_query(sqlalchemy.text(query), db)
    chart3 = alt.Chart(df).mark_point().encode(
        y='count()', x='string_date:T').properties(width=900).interactive()

    chart1 = alt.Chart(df).mark_bar().encode(
        x=alt.X('count(article_id):Q'),
        y=alt.Y("site:N",
                sort=alt.EncodingSortField(
                    field="site", op="count",
                    order="descending"))).transform_aggregate(
                        groupby=["article_id", "site"]).properties(height=800)

    chart2 = alt.Chart(df).mark_bar().encode(
        x=alt.X('freq_palabras:Q', aggregate="sum"),
        y=alt.Y(
            "palabra",
            sort=alt.EncodingSortField(
                field="freq_palabras", op="sum",
                order="descending"))).transform_aggregate(
                    freq_palabras='sum(n_w)',
                    groupby=["palabra"],
                ).transform_window(
                    rank='row_number()',
                    sort=[alt.SortField("freq_palabras", order="descending")],
                ).transform_filter(
                    (alt.datum.rank < 25)).properties(height=800)

    return alt.vconcat(chart3, alt.hconcat(chart1, chart2)).to_json()
def make_chart_organisational_diversity(
    org_coeffs,
    num_orgs,
    metric_params,
    org_type_lookup,
    paper_counts,
    save=True,
    fig_num=14,
):
    """Plot comparing the organisational diversity coefficients"""

    # Regression coefficients sorted
    selected = (org_coeffs[metric_params].sort_values("beta").head(
        n=num_orgs).reset_index(drop=False))

    selected["org_type"] = selected["index"].map(org_type_lookup)
    selected["order"] = range(0, len(selected))

    # Paper counts by organisation
    recent_papers_orgs = (paper_counts.loc[selected["index"]].reset_index(
        name="papers").rename(columns={"index": "org"}))
    recent_papers_orgs["order"] = range(0, len(recent_papers_orgs))
    recent_papers_orgs["org_type"] = recent_papers_orgs["org"].map(
        org_type_lookup)

    b_ch = (alt.Chart(selected).mark_bar().encode(
        y=alt.Y("index", sort=alt.EncodingSortField("order"), title=""),
        x=alt.X("beta", title="Coefficient on diversity"),
        color=alt.X("org_type", title="Organisation type"),
    )).properties(width=150, height=600)

    b_err = (alt.Chart(selected).mark_errorbar().encode(
        y=alt.Y(
            "index",
            sort=alt.EncodingSortField("order"),
            title="",
            axis=alt.Axis(ticks=False, labels=False),
        ),
        x=alt.X("lower", title=""),
        x2="upper",
    )).properties(width=150, height=600)

    b_act = (alt.Chart(recent_papers_orgs).mark_bar().encode(
        y=alt.Y(
            "org",
            title=None,
            sort=alt.EncodingSortField("order"),
            axis=alt.Axis(labels=False, ticks=False),
        ),
        x=alt.X("papers"),
        color="org_type",
    )).properties(width=100, height=600)

    out = (b_ch + b_err).resolve_scale(y="independent")
    out_2 = alt.hconcat(out, b_act, spacing=0).resolve_scale(y="shared")

    if save is True:
        save_altair(out_2, f"fig_{fig_num}_comp", driv)

    return out_2
Пример #3
0
def show_single_xstar(x,
                      width=400,
                      height=400,
                      labelFontSize=10,
                      titleFontSize=10,
                      prepare_url_func=None,
                      red_green=True):
    df = x.stack().reset_index()
    df.columns = ["i", "j", "x"]

    order = pd.Series(np.arange(len(x), 0, -1), index=x.index)  #x.sum(axis=1)
    df["ri"] = list(order.loc[df["i"]])  #list(r.loc[df["i"]])
    df["rj"] = list(order.loc[df["j"]])  #list(r.loc[df["j"]])
    if red_green:
        df.loc[:, "c"] = "white"
        df.loc[(df["x"] > 0) & (df["x"] < 1) & (df["ri"] <= df["rj"]),
               "c"] = "green"
        df.loc[(df["x"] > 0) & (df["x"] < 1) & (df["ri"] >= df["rj"]),
               "c"] = "red"
        df.loc[df["i"] == df["j"], "c"] = "black"
        color = alt.Color("c:N", scale=None)
    else:
        df.loc[:, "c"] = 0
        df.loc[(df["x"] > 0) & (df["x"] < 1) & (df["ri"] <= df["rj"]),
               "c"] = 1 - (df.loc[(df["x"] > 0) & (df["x"] < 1) &
                                  (df["ri"] <= df["rj"]), "x"])
        df.loc[(df["x"] > 0) & (df["x"] < 1) & (df["ri"] >= df["rj"]),
               "c"] = (df.loc[(df["x"] > 0) & (df["x"] < 1) &
                              (df["ri"] >= df["rj"]), "x"])
        df.loc[df["i"] == df["j"], "c"] = 1
        scale = alt.Scale(domain=[0, 0.0001, 1],
                          range=["white", "white", "grey"])
        #color = alt.Color("c",scale=alt.Scale(scheme="greys"))
        color = alt.Color("c", scale=scale)

    if prepare_url_func is not None:
        df_url = prepare_url_func(df)
    else:
        df_url = df
    g = alt.Chart(df_url, width=width).mark_square().encode(
        x=alt.X(
            'i:N',
            axis=alt.Axis(labelOverlap=False, labelFontSize=8),
            title="r",
            sort=alt.EncodingSortField(
                field="ri", order="descending")  # The order to sort in
        ),
        y=alt.Y(
            'j:N',
            axis=alt.Axis(labelOverlap=False, labelFontSize=8),
            title="r",
            sort=alt.EncodingSortField(
                field="rj", order="descending")  # The order to sort in
        ),
        color=color  #alt.Color("c:N",scale=None)#alt.Scale(scheme='greys'))
    ).properties(width=width,
                 height=height).configure_axis(labelFontSize=labelFontSize,
                                               titleFontSize=titleFontSize)

    return g
Пример #4
0
def Joint_Prob_plot(df):

    year_min = df["cohort"].min()
    year_max = df["cohort"].max()
    slider = alt.binding_range(min=year_min, max=year_max, step=1)
    select = alt.selection_single(name="year",
                                  fields=['cohort'],
                                  bind=slider,
                                  init={'cohort': year_min})

    df_new = df.apply(lambda row: gen_plot_df(row), axis=1)
    df_new = pd.concat(list(df_new))

    base = alt.Chart(df_new)

    # chart on Joint Prob

    plot_scale = alt.Scale(type="pow",
                           exponent=0.5,
                           scheme="greens",
                           nice=True)
    color = alt.Color('MR:Q', scale=plot_scale)

    joint_chart = base.mark_rect().encode(
        x="p:O",
        y=alt.Y('k:O', sort=alt.EncodingSortField('k', order='descending')),
        color=color).add_selection(select).transform_filter(select).properties(
            height=200, width=200)

    color_scale = alt.Scale(
        domain=['1', '2', '3', '4', '5'],
        range=['#4c78a8', '#f58518', '#e45756', '#72b7b2', '#54a24b'])
    p_mag_1 = base.mark_bar().encode(
        x=alt.X("p:O"),
        y=alt.Y('sum(MR):Q', scale=alt.Scale(domain=(0, 1))),
        color=alt.Color('k:O', scale=color_scale, legend=None),
        order=alt.Order(aggregate='sum',
                        type="quantitative",
                        sort='descending')).add_selection(
                            select).transform_filter(select).properties(
                                height=150, width=200)

    k_mag_1 = base.mark_bar().encode(
        y=alt.Y("k:O", sort=alt.EncodingSortField('k', order='descending')),
        x=alt.X('sum(MR):Q', scale=alt.Scale(domain=(0, 1))),
        color=alt.Color('p:O',
                        scale=color_scale,
                        sort=alt.EncodingSortField('p', order='ascending'),
                        legend=None),
        order=alt.Order(aggregate='sum',
                        type="quantitative",
                        sort='descending')).add_selection(
                            select).transform_filter(select).properties(
                                height=200, width=150)

    people_c = people_plot(df, select).properties(width=300, height=300)

    return (p_mag_1 & (joint_chart | k_mag_1)
            & people_c).resolve_scale(color='independent')
Пример #5
0
def plot_err_heatmap(bet_results, mask_results):
    r"""Plot Error heatmap"""
    x, y = np.meshgrid(bet_results.iso_df.relp, bet_results.iso_df.relp)
    temp = np.round(bet_results.err.copy(), 2)
    temp[mask_results.mask] = 0
    dmin = np.amin(temp[~mask_results.mask])
    dmax = np.amax(temp[~mask_results.mask])
    source = pd.DataFrame({
        "Start relative pressure": x.ravel(),
        "End relative pressure": y.ravel(),
        "Error": temp.ravel(),
    })
    hmap = (alt.Chart(source).mark_rect(stroke="gray", strokeWidth=0.5).encode(
        x=alt.X(
            "Start relative pressure:O",
            sort=alt.EncodingSortField(
                "Start relative pressure",
                order="ascending",
            ),
            axis=alt.Axis(tickMinStep=2,
                          tickCount=10,
                          labelSeparation=5,
                          format=",.2r"),
        ),
        y=alt.Y(
            "End relative pressure:O",
            sort=alt.EncodingSortField("End relative pressure",
                                       order="descending"),
            axis=alt.Axis(tickMinStep=2,
                          tickCount=10,
                          labelSeparation=5,
                          format=",.2r"),
        ),
        color=alt.Color("Error:Q",
                        scale=alt.Scale(domain=[dmin, dmax], scheme="Greys")),
        tooltip=["Error", "Start relative pressure", "End relative pressure"],
    ).configure_view(strokeWidth=0).configure_scale(
        bandPaddingInner=0.15).configure_axis(
            labelFontSize=axis_label_size,
            titleFontSize=axis_title_size,
            domainColor="white",
        ).properties(
            title="Error",
            height=600,
            width=670,
        ).configure_title(fontSize=figure_title_size).configure_legend(
            padding=10,
            strokeColor="white",
            cornerRadius=10,
            labelFontSize=legend_label_size,
            titleFontSize=legend_title_size,
            gradientLength=250,
            tickCount=5,
            offset=40,
        ).interactive())

    st.altair_chart(hmap, use_container_width=True)
Пример #6
0
def show_chart(data,items):
    stat = {'rank':'Rank','All Employees':'All Employees (1K)','Avg Hourly Wages':'Avg Hourly Wages','eWage':'Pricing Power','score':'Employment Strengths','eCPI':'Price Index'}

    stat_text = list(stat.values())
    stat_keys = list(stat.keys())
    items = [key for key, value in items.items() if value!=0]

    data = data[['year','cbsa_area','Metro area']+stat_keys+items]

    st.subheader('Employment strengths')

    # Employment strengths scatter
    scatter = (alt.Chart(data)
        .mark_circle()
        .encode(
            x=alt.X(stat_keys[1],title=stat_text[1]),
            y=alt.Y(stat_keys[2],title=stat_text[2]),
            size=alt.Size(stat_keys[4],legend=None),
            tooltip=[alt.Tooltip('Metro area'),alt.Tooltip(stat_keys[0],title=stat_text[0]),alt.Tooltip(stat_keys[1],title=stat_text[1]),alt.Tooltip(stat_keys[2],title=stat_text[2],format='$')]
        )
    )

    st.altair_chart(scatter,use_container_width=True)

    st.subheader('Price Index')

    # Price Index stacked_bar
    stacked_cpi = (alt.Chart(data)
        .transform_fold(items,['Item','Price Index'])
        .mark_bar()
        .encode(
            x=alt.X('Price Index:Q'),
            y=alt.Y('Metro area:N',sort=alt.EncodingSortField(field=stat_keys[5],order='descending')),
            color=alt.Color('Item:N'),
            tooltip=[alt.Tooltip('Metro area'),alt.Tooltip('Item:N'),alt.Tooltip('Price Index:Q'),alt.Tooltip(stat_keys[5],title=stat_text[5])]
        )
    )

    st.altair_chart(stacked_cpi,use_container_width=True)

    st.subheader('Pricing Power')

    # Employment strengths scatter
    stacked_pp = (alt.Chart(data)
        .mark_bar()
        .encode(
            x=alt.X(stat_keys[3],title=stat_text[3]),
            y=alt.Y('Metro area:N',sort=alt.EncodingSortField(field=stat_keys[3],order='descending')),
            tooltip=[alt.Tooltip('Metro area'),alt.Tooltip(stat_keys[2],title=stat_text[2]),alt.Tooltip(stat_keys[5],title=stat_text[5]),alt.Tooltip(stat_keys[3],title=stat_text[3])]
        )
    )

    st.altair_chart(stacked_pp,use_container_width=True)
Пример #7
0
def plot_feature_importance(data, importance_plot_path):
    """
    Plots an importance plot
    
    Parameters:
    data -- (dataframe) Importance dataframe
    importance_plot_path -- (string) path to print the plot
    """
    data_melted = data.melt(id_vars=['Features'],
                            var_name='Model',
                            value_name='Importance')

    chart = alt.Chart(data_melted).mark_bar().encode(
        alt.X('Importance:Q'),
        alt.Y('Features:O',
              sort=alt.EncodingSortField(field='Importance',
                                         order='descending'),
              axis=alt.Axis(title='Features', tickCount=8)),
        alt.Color('Model:N'), alt.Row('Model:N')).properties(
            width=500,
            height=1000,
            title=f'Feature Importance using Different Models').configure_axis(
                labelFontSize=15,
                titleFontSize=22).configure_title(fontSize=26)

    chart.save(importance_plot_path)
Пример #8
0
def squares(proba, actual, predicted):
    df = pd.DataFrame(proba, copy=True)
    metadata = pd.DataFrame({'predicted': predicted, 'actual': actual})
    for col in df.columns:
        df[col][metadata['actual'] != col] = None

    df['predicted'] = predicted
    df['actual'] = actual
    df.columns = df.columns.astype(str)

    df = df.melt(id_vars=['predicted', 'actual'])

    bins = [b / 10 for b in range(11)]
    df['bins'] = pd.cut(df['value'], bins=bins, include_lowest=True)
    b = pd.DataFrame({
        'count':
        df.groupby(['bins', 'variable', 'actual', 'predicted'])['bins'].size()
    }).reset_index()
    b['bins'] = b['bins'].astype(str)

    chart = alt.Chart(b, title='').mark_bar().encode(
        x=alt.X('count', axis=alt.Axis(ticks=False, labels=False, title='')),
        y=alt.Y('bins:N',
                title='Prediction Score',
                sort=alt.EncodingSortField(field='bins',
                                           op='sum',
                                           order='descending')),
        column=alt.Column('actual', title=''),
        color='predicted:N',
        tooltip=['count', 'predicted']).properties(width=100).configure_axis(
            grid=False).configure_view(strokeOpacity=0)

    return chart
Пример #9
0
 def bar_chart(self, dataframe, order='descending', height=300):
     # Parameters:
     # datadrame: must be a spark dataframe. The dataframe must have only two columns. The second column must contain a numerical variable
     # order: String value. Default value is 'descending'. Other possible value is 'ascending'. This parameter sorts the bars in the chart
     # height: Integer value. Default value is 300. This parameter affects the final height of the chart
     # About nulls values: This function does not consider null values on its calculations
     # Importing Libraries and Modules
     import altair as alt
     # Dropping null values in the second column
     df = dataframe.dropna(how='any', subset=dataframe.columns[1])
     # Converting spark dataframe into pandas dataframe so we can use Altair library to create our bar chart
     pandas_df = df.toPandas()
     # Object to plot the bar chart. In 'y' axis, we are sorting the values by second column values. The operation ('op') used is 'sum'
     bars = alt.Chart(pandas_df).mark_bar().encode(
         x=pandas_df.columns[1],
         y=alt.Y(pandas_df.columns[0],
                 sort=alt.EncodingSortField(field=pandas_df.columns[1],
                                            op='sum',
                                            order=order)))
     # Object to plot data labels in the barchart. dx is the distance between label and bar
     text = bars.mark_text(align='left', baseline='middle',
                           dx=3).encode(text=pandas_df.columns[1])
     return (bars + text).properties(
         height=height
     )  # This function returns a horizontal bar chart with data labels
Пример #10
0
def showBar_alt(data,y='Life expectancy ',nums=10,year=2000,height=600,width=800,title=None):
    """
    show bar plot of a feature in given year with top nums countries
    
    params:
        data, originaldata, pd.dataframe
        y, y-axis, str
        nums, the number of countries we want to present, int
        year, the yeart we want to extract data and show from, int
        height, the height of the graph we want to plot, int
        width, the width of the graph we want to plot, int
    """
    
    assert isinstance(data,pd.core.frame.DataFrame)
    assert isinstance(y,str)
    assert isinstance(nums,int)
    assert isinstance(year,int) and data.Year.min() <= year <= data.Year.max()  
    assert isinstance(height,int) and isinstance(width,int)
    
    
    
    newdata=sortdata(data,year,nums)
    
    return alt.Chart(newdata,title=title).mark_bar().encode(
    x=alt.X('Life expectancy ', axis=alt.Axis(title='Life Expectancy')),
    y=alt.Y('Country', sort=alt.EncodingSortField(field='Life Expectancy ', op='sum', order='ascending')),
    color='rank',
    ).properties(
    height=height,
    width=width
    ).configure_axis(titleFontSize=15)
Пример #11
0
def to_predictions_chart(sortedproba) -> alt.Chart:
    chart = (alt.Chart(sortedproba[:5]).mark_bar().encode(
        x=alt.X("probability:Q", scale=alt.Scale(domain=(0, 100))),
        y=alt.Y("dog breed name:N",
                sort=alt.EncodingSortField(field="probability",
                                           order="descending"))))
    return chart
Пример #12
0
    def plot_bar_total_count(selection):
        count_bar = (
            alt.Chart(alt_base_layer_data_count,
                      title='Squirrel Count by Park Region').mark_bar().
            add_selection(selection).encode(
                x=alt.Y('properties.Unique_Squirrel_ID:Q',
                        title="Squirrel Count",
                        axis=alt.Axis(labelFontSize=16, titleFontSize=20)),
                y=alt.X('properties.sitename_short:N',
                        title="Park Region",
                        axis=alt.Axis(labelFontSize=12, titleFontSize=20),
                        sort=alt.EncodingSortField(
                            field="properties.Unique_Squirrel_ID:Q",
                            order="descending")),
                color=alt.Color('properties.Unique_Squirrel_ID:Q',
                                scale=alt.Scale(scheme='greens')),

                # SELECTION SINGLE CONDITIONS -- opacity is 0.2 if not selected
                opacity=alt.condition(selection, alt.value(1.0),
                                      alt.value(0.2)),
                tooltip=[
                    alt.Tooltip('properties.sitename:N', title="Park Region"),
                    alt.Tooltip('properties.Unique_Squirrel_ID:Q',
                                title="Squirrel Count")
                ]).properties(width=pw, height=ph))
        return (count_bar)
Пример #13
0
def make_bars(data, eda_file_path):
    """
    Creates bar plots of average stats vs. the response variable.

    Parameters:
    data -- (dataframe) The training data
    eda_file_path -- (str) The path to specify where the plot is saved
    """

    numerical_features = [
        'HP', 'Attack', 'Defense', 'Special_attack', 'Special_defense', 'Speed'
    ]

    for i in range(len(numerical_features)):
        pokemon_graph = data[['Tier_2', numerical_features[i]]].groupby(by = 'Tier_2')\
                                                                        .mean()\
                                                                        .reset_index()

        chart = alt.Chart(pokemon_graph).mark_bar().encode(
            alt.X(numerical_features[i]),
            alt.Y('Tier_2:O',
                  sort=alt.EncodingSortField(field=numerical_features[i],
                                             order="descending"),
                  axis=alt.Axis(title="Tier classification", tickCount=8))
        ).properties(
            width=500,
            height=500,
            title=f'Mean {numerical_features[i]} by Tier').configure_axis(
                labelFontSize=15,
                titleFontSize=22).configure_title(fontSize=26)

        chart.save('{}{}.png'.format(eda_file_path, numerical_features[i]))
        print(f"{numerical_features[i]}.png saved in {eda_file_path}")
Пример #14
0
def make_altair_chart(country):
    chart = alt.Chart(df1[df1['Country'] == country]).mark_bar().encode(
        alt.Y('Country:N', sort=alt.EncodingSortField(field="installs:Q", op="sum", order='descending')),
        alt.X('hours_bucket:O'),
        alt.Color('share_within_country:Q', scale=alt.Scale(scheme='greenblue'))
        )
    return chart
Пример #15
0
def create_dotted_chart(df: pd.DataFrame,
                        color_attribute: str,
                        x_attr: str,
                        y_attr: str,
                        y_sort: str,
                        tooltip: Optional[List] = None) -> alt.Chart:
    # c = alt.Chart(df).mark_line().encode(
    #     alt.X(f"{x_attr}:T",  axis=alt.Axis(labelAngle=-45)),
    #     alt.Y('Case ID:O'),# sort=alt.EncodingSortField(field=y_sort)),
    #     detail='Duration',
    #     color=alt.Color(color_attribute))

    c = alt.Chart(df, ).mark_circle(
        opacity=0.8,
        size=100,
        # stroke='black',
        # strokeWidth=1
    ).encode(
        alt.X(f"{x_attr}:T"),
        alt.Y(f"{y_attr}:O",
              axis=alt.Axis(labelAngle=90),
              sort=alt.EncodingSortField(field=y_sort)),
        # alt.Size('Deaths:Q',
        #          scale=alt.Scale(range=[0, 4000]),
        #          legend=alt.Legend(title='Annual Global Deaths')
        #          ),
        color=alt.Color(color_attribute, legend=None),
        tooltip=tooltip
    ).properties(
        width=1000,
        height=800
        # ).transform_filter(
        #     alt.datum.Entity != 'All natural disasters'
    ).interactive()
    return c
def draw_feature_importance_tornado(feat_imp_df):

    feat_imp_bars = alt.Chart(
        feat_imp_df, width=1000, height=500,
        title="Feature Importances").mark_bar().encode(
            alt.X('importance:Q', title="Importance"),
            alt.Y('feature:N',
                  title="Feature",
                  sort=alt.EncodingSortField(field="importance",
                                             op="sum",
                                             order='descending')),
            alt.Color('importance:Q',
                      legend=None,
                      scale=alt.Scale(type="log",
                                      range=["lightskyblue", "steelblue"])),
        )

    text = feat_imp_bars.mark_text(
        align='left',
        baseline='middle',
        dx=3  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(text='importance:Q')

    return feat_imp_bars.configure_axis(
        labelFontSize=12, titleFontSize=20, labelFlush=False,
        labelLimit=400).configure_title(fontSize=30)
Пример #17
0
def html_ca_entreprise():
    df = create_df_quote_company()
    df_signed = df[df.quote_status == 'signed']
    df_signed_company = df_signed.groupby('company_name').agg({
        'ht_amount':
        sum,
        'quote_status':
        'count'
    }).reset_index()
    df_signed_company.company_name = df_signed_company.company_name.str.title()
    plot = alt.Chart(
        df_signed_company, title="Part du chiffre d'affaire par entreprise"
    ).transform_joinaggregate(
        TotalAmount='sum(ht_amount)', ).transform_calculate(
            PercentOfTotal="datum.ht_amount / datum.TotalAmount").mark_bar(
            ).encode(x=alt.X('PercentOfTotal:Q',
                             axis=alt.Axis(format='.0%'),
                             title='Pourcentage du CA'),
                     y=alt.Y('company_name:N',
                             title='Nom entreprises',
                             sort=alt.EncodingSortField(field="PercentOfTotal",
                                                        order='descending')),
                     tooltip=[
                         alt.Tooltip('sum(ht_amount)',
                                     title='Sum Montant HT€'),
                         alt.Tooltip('quote_status', title='Nbr contrat')
                     ])
    plot.save('app/templates/plot/repartition_ca_entreprise.html')
    return plot
Пример #18
0
def homeAdv():

    adv = pd.read_csv("App/Data/homeadvantage.csv")

    palette = alt.Scale(domain=['Home Team', 'Away Team'],
                        range=["#5bc0de", "#d9534f"])

    chart = alt.Chart(adv, height=500, width=1000).mark_bar().encode(
        x=alt.X('points:Q', title='Average points'),
        y=alt.Y('team_flag:N',
                sort='-y',
                title='',
                axis=alt.Axis(labels=False)),
        color=alt.Color('team_flag:N', scale=palette, title=''),
        row=alt.Row('league:N',
                    title='',
                    sort=alt.EncodingSortField("points",
                                               op='max',
                                               order='descending'),
                    header=alt.Header(labelAngle=0, labelAlign='left')),
        tooltip=[alt.Tooltip(
            'points:Q', format='.2f')]).properties(height=25).configure_view(
                stroke='transparent').configure_axis(grid=False).interactive()

    return chart.to_json()
Пример #19
0
def make_chart(data, days):
    """
    Make an Altair chart
    """
    columns = ["ZillowName", "fatal"]
    data = data[columns]

    return (
        alt.Chart(data)
        .mark_bar()
        .encode(
            y=alt.Y(
                "ZillowName:N",
                title="Neighborhood",
                sort=alt.EncodingSortField(
                    op="count",  # The operation to run on the field prior to sorting
                    order="descending",  # The order to sort in
                ),
            ),
            x=alt.X("count()", title="Number of Shootings"),
            color=alt.Color("fatal:N", title="Fatal?"),
            tooltip=[
                alt.Tooltip("count()", title="Number of Shootings"),
                alt.Tooltip("ZillowName", title="Neighborhood"),
                alt.Tooltip("fatal", title="Fatal?"),
            ],
        )
        .properties(
            width=400,
            height=800,
            title="Shootings in the Last %d Days by Neighborhood" % days,
        )
    )
Пример #20
0
def Area_markLine(df,Title,YTitle,Color1): # ------- df -> Line
    selection = alt.selection_multi(fields=[Color1], bind='legend')

    plot = alt.Chart(df).mark_line(interpolate='basis'
        ).encode(
            alt.X('month(Date):N', axis=alt.Axis(labelFontSize=10)),
            alt.Y('average(ValueAll):Q', axis=alt.Axis(labelFontSize=15,title=YTitle)),
            color=alt.Color(Color1,sort=alt.EncodingSortField('average(ValueAll):O', op='mean', order='descending')),
            opacity=alt.condition(selection, alt.value(2), alt.value(0.2)),
        ).configure_axis(
            labelFontSize=15,
            titleFontSize=15
        ).configure_title(
            fontSize=24
        ).properties(
            title=Title,
            height=400,width=600,
        ).configure_axisX(
            labelAngle=-45,
        ).configure_legend(
            titleColor='black', 
            titleFontSize=15,
            labelFontSize=15,
        ).add_selection(
            selection
        )
    return plot
def crime_bar_chart(df):
    """
    Create the bar chart to display top ten crimes/ selected crimes  
    
    Parameters
    ----------
    df : 
        wrangled dataframe to produce the bar chart

    Returns
    -------
    altair plot :
        altair bar plot 
    """

    df_year_grouped = df.groupby('OFFENSE_CODE_GROUP').size().sort_values(
        ascending=False)[:10]
    df = df[df['OFFENSE_CODE_GROUP'].isin(df_year_grouped.index)]

    crime_type_chart = alt.Chart(df).mark_bar().encode(
        y=alt.X('OFFENSE_CODE_GROUP:O',
                title="Crime",
                sort=alt.EncodingSortField(op="count", order='descending')),
        x=alt.Y('count():Q', title="Crime Count"),
        tooltip=[
            alt.Tooltip('OFFENSE_CODE_GROUP:O', title='Crime'),
            alt.Tooltip('count():Q', title='Crime Count')
        ]).properties(title="Crime Count by Type", width=250, height=250)
    return crime_type_chart
def visualize(emotion_array):
    '''Visualize the sentiment analysis using altair
       Parameters: DataFrame: emotion_array, the dataframe about emotions of each critic
       Returns: BarChart: a mutiple barchart to show the emtions of each critic'''
    alt.renderers.enable('notebook')
    emotions = [
        'positive', 'negative', 'anger', 'anticipation', 'disgust', 'fear',
        'joy', 'sadness', 'surprise', 'trust'
    ]
    input_dropdown = alt.binding_select(options=emotions)
    selection = alt.selection_single(fields=['EMOTION'],
                                     bind=input_dropdown,
                                     name='Type of')
    color = alt.condition(selection, alt.Color('EMOTION:N', legend=None),
                          alt.value('lightgray'))
    chart_final = alt.Chart(emotion_array).mark_bar().encode(
        x='PERCENT:Q',
        y=alt.Y('EMOTION:N',
                sort=alt.EncodingSortField(field='PERCENT',
                                           op='count',
                                           order='ascending')),
        facet='name:N',
        color=color,
        tooltip='PERCENT:Q',
    ).properties(
        width=150,
        height=150,
        title='Sentiment for Top 5 Critics',
        columns=5,
    ).add_selection(selection)
    chart_final.save('finalchart.html')
    return chart_final
Пример #23
0
 def to_predictions_chart(predictions, ) -> alt.Chart:
     """A pretty chart of the (prediction, probability) to output to the user"""
     dataframe = pd.DataFrame(
         predictions,
         columns=[
             "id",
             "prediction",
             "probability",
         ],
     )
     dataframe["probability"] = dataframe["probability"].round(2) * 100
     chart = (alt.Chart(dataframe).mark_bar().encode(
         x=alt.X(
             "probability:Q",
             scale=alt.Scale(domain=(
                 0,
                 100,
             )),
         ),
         y=alt.Y(
             "prediction:N",
             sort=alt.EncodingSortField(
                 field="probability",
                 order="descending",
             ),
         ),
     ))
     return chart
Пример #24
0
def make_bar_plot(category, damage):
    """
    Generates a bar plot based on user selection of category and damage level

    Parameters
    ----------
    category - a string; user selected from dropdown and passed by callback
    damage - a string; user selected from dropdown and passed by callback

    Return
    ------
    an altair plot converted to html
    """

    alt.themes.register('mds_special', mds_special)
    alt.themes.enable('mds_special')
    #alt.themes.enable('none') # to return to default

    query_string = ""
    for user_select_damage in damage:
        query_string += 'damage_level == "' + user_select_damage + '" | '
    query_string = query_string[:-2]

    x_title = category.replace('_', ' ').title()
    main_title = 'Effect of ' + x_title + ' on Birdstrikes'

    if len(query_string) != 0:
        #generate a bar plot
        bar_plot = alt.Chart(
            df.query(query_string),
            title=main_title).mark_bar(opacity=0.3).encode(
                alt.X(category + ':O',
                      axis=alt.Axis(title=x_title, labelAngle=0),
                      sort=alt.EncodingSortField(field='damage_level_sort',
                                                 op='count',
                                                 order='ascending')),
                alt.Y('count(damage_level):Q',
                      axis=alt.Axis(title="Bird Strikes"),
                      stack=True),
                alt.Color(
                    'damage_level',
                    scale=alt.Scale(
                        domain=['Substantial', 'Medium', 'Minor', 'None'],
                        range=['red', 'dodgerblue', 'grey', 'darkgreen']),
                    legend=alt.Legend(orient='bottom',
                                      titleOrient='left',
                                      title="Damage Level",
                                      labelFontSize=15,
                                      titleFontSize=15)),
                alt.Order('damage_level_sort', sort='ascending'),
                alt.Tooltip(['count(damage_level)'])).properties(width=500,
                                                                 height=400)

        bar_plot = bar_plot.to_html()

    else:
        bar_plot = None

    return bar_plot
def get_interactive_proportions_plot(gender_balance):
    source = data_frames[gender_balance]
    pts = alt.selection(type="multi", encodings=['x'])

    lin = alt.Chart(source).mark_line().encode(
        alt.X('year:O', title='Year'),
        alt.Y('female_prop:Q',
              title="Proportion of Women",
              axis=alt.Axis(format='%'),
              scale=alt.Scale(domain=[0, 1])),
        alt.Color('job:N', legend=None)).transform_filter(pts).properties(
            width=500, height=375, title="Proportion of Women by Year")

    label = alt.selection_single(
        encodings=['x'],  # limit selection to x-axis value
        on='mouseover',  # select on mouseover events
        nearest=True,  # select data point nearest the cursor
        empty='none'  # empty selection includes no data points
    )

    lin_w_interaction = alt.layer(
        lin,  # base line chart
        alt.Chart().mark_rule(color='#aaa').encode(
            x='year:O').transform_filter(label),
        lin.mark_circle().encode(opacity=alt.condition(label, alt.value(
            1), alt.value(0))).add_selection(label),
        lin.mark_text(
            align='left', dx=5, dy=-5, stroke='white',
            strokeWidth=2).encode(text=alt.Text(
                'female_prop:Q', format='.2%')).transform_filter(label),
        lin.mark_text(align='left', dx=5, dy=-5).encode(text=alt.Text(
            'female_prop:Q', format='.2%')).transform_filter(label),
        data=source)

    bar = alt.Chart(source).mark_bar(size=30).encode(
        y=alt.Y('job:N',
                title='',
                sort=alt.EncodingSortField(field="total_prop_female",
                                           op="sum",
                                           order="descending")),
        x=alt.X('total_prop_female:Q',
                title="Proportion of Women",
                axis=alt.Axis(format='%')),
        color=alt.condition(pts, alt.Color(
            'job:N', legend=None), alt.ColorValue("grey"))).properties(
                width=250,
                height=375,
                title="Jobs by Proportion of Women (For the 10 most " +
                gender_balance + " jobs)").add_selection(pts)

    interactive_job_chart = alt.hconcat(lin_w_interaction, bar).resolve_legend(
        color="independent",
        size="independent").configure_axis(labelFontSize=13, titleFontSize=14)
    # Save html as a StringIO object in memory
    job_gender_proportions_html = io.StringIO()
    interactive_job_chart.save(job_gender_proportions_html, 'html')

    # Return the html from StringIO object
    return job_gender_proportions_html.getvalue()
Пример #26
0
def bestTeamPlot(year, top):

    bigDf = pd.read_csv("App/Data/CumulativeSeasons.csv")

    dfSeason = bigDf[bigDf['season'] == str(year + 2000) + "/" +
                     str((year + 1) + 2000)]

    if top:
        df = dfSeason.groupby(
            ['result',
             'team_long_name']).size()['won'].sort_values(ascending=False)[:5]
    else:
        df = dfSeason.groupby(['result', 'team_long_name'
                               ]).size()['won'].sort_values()[:5]
    teamList = df.index.tolist()

    num_players = 11
    df_won = []
    df_lost = []
    df_draw = []
    for i in df.index:
        won = int(
            (dfSeason.groupby(['team_long_name', "result"]).size()[i][2]) /
            num_players)
        lost = int(
            (dfSeason.groupby(['team_long_name', "result"]).size()[i][1]) /
            num_players)
        draw = int(
            (dfSeason.groupby(['team_long_name', "result"]).size()[i][0]) /
            num_players)

        df_won.append(won)
        df_lost.append(lost)
        df_draw.append(draw)
    best = pd.DataFrame({
        'Team': teamList,
        'Wins': df_won,
        'Losts': df_lost,
        'Draw': df_draw
    })

    best.to_csv("best.csv", index=None)

    chart = alt.Chart(
        pd.melt(best, id_vars=['Team'], var_name='Result', value_name='Total'),
        height=400,
        width=165).mark_bar().encode(
            alt.X('Result:N', axis=alt.Axis(title="", labels=False)),
            alt.Y('Total:Q', axis=alt.Axis(title='Total', grid=False)),
            alt.Tooltip(["Total:Q"]),
            color=alt.Color('Result:N'),
            column=alt.Column(
                'Team:O',
                sort=alt.EncodingSortField("Total",
                                           op='max',
                                           order='descending'),
                title="")).configure_view(stroke='transparent').interactive()

    return chart.to_json()
Пример #27
0
def bar_from_series(series, column_name, index_name):
    series.name = column_name
    df = series.to_frame()
    df.index.name = index_name
    df.reset_index(inplace=True)
    y = alt.Y(index_name,
              sort=alt.EncodingSortField(field=column_name, op="values"))
    return alt.Chart(df).mark_bar().encode(x=column_name, y=y)
def phrase_bar_chart(data):
    phrase_bar_chart = alt.Chart(data).mark_bar().encode(
        x='count',
        y=alt.Y('phrase',
                sort=alt.EncodingSortField('count', order="descending"),
                axis=alt.Axis(labelAngle=0)),
    ).properties(width=700)

    return phrase_bar_chart
Пример #29
0
def plot_heatmap(file_name, N=5, width=None):
    data = pd.concat(
        sample_and_melt(file_name, cell_id, N)
        for cell_id in ["PC3", "MCF7", "VCAP"])
    heatmap = (alt.Chart(data).mark_rect().encode(
        x=alt.X("hidden_unit:N", sort=list(range(16))),
        y=alt.Y("inst_id:O", sort=alt.EncodingSortField(field="cell_id")),
        color=alt.Color("activation:Q", scale=alt.Scale(scheme="inferno")),
    ))

    return heatmap
Пример #30
0
def make_gdp_perc_chart(year=2018, stat_type='Export'):
    '''
    Create a bar chart that shows Imports/Exports (Dynamic based on switch/callback) as a percentage of GDP
        in the year selected (based on year slider), and show the highest 15.

    Parameters
    -----------
    year: integer [1988, 2018]
        the year for which data is to be displayed - controlled by slider, default is 2018

    stat_type: string one of 'Import' or 'Export'
        determines whether this graph will show imports or exports as a percentage of GDP,
        default is 'Export', and controlled by switch

    Returns
    -----------
    gdp_perc_chart: chart
        bar chart showing stat_type as a percentage of GDP for the specified year

    Example
    -----------
    > make_gdp_perc_chart(2017, 'Import')
    '''
    countries = ['USA', 'Italy', 'Spain', 'Germany', 'Czech Rep.', 'Brazil', 'Norway',
                 'Switzerland', 'Turkey', 'Canada', 'Japan', 'Croatia', 'United Kingdom', 'France']

    # Wrangling specific to this chart:
    df_for_perc_of_gdp = arms_gdp[  # (arms_gdp['Country'].isin(countries)) &
        (arms_gdp['Year'] == year) &
        (arms_gdp['Direction'] == stat_type)].sort_values(by='percent_GDP', ascending=False).head(15)

    # df_for_perc_of_gdp['percent_GDP'] = df_for_perc_of_gdp['percent_GDP'] * 100

    # Make the chart:
    gdp_perc_chart = alt.Chart(df_for_perc_of_gdp).mark_bar().encode(
        alt.X('Country:N',
              sort=alt.EncodingSortField(field='percent_GDP',
                                         order='descending'),
              title='Country',
              axis=alt.Axis(labelAngle=45)),
        alt.Y('percent_GDP:Q',
              title='Arms Trade as a % of GDP',
              # scale=alt.Scale(domain=(0, (0.2 if stat_type == 'Import' else 0.5)))
              ),
        alt.Color('percent_GDP:Q', scale=alt.Scale(scheme='goldorange'), legend=None),
        alt.Order(shorthand=['percent_GDP'], sort='descending'),
        alt.Tooltip(['Country', 'percent_GDP'])
    ).configure_bar(color='orange'
                    ).properties(width=920,
                                 height=230,
                                 background='white',
                                 title="Arms Trade as a Percentage of GDP for Major " + stat_type + "ers in %d" % (
                                     year))
    return gdp_perc_chart