Exemplo n.º 1
0
def make_figure(x_selected, color_selected):

    fig1 = px.scatter(
        df,
        x=x_selected,
        y="SRR",
        trendline="ols",
        hover_name="FacilityInfo",
        #        height=1000,
        opacity=1,
        color=color_selected)

    fig2 = px.histogram(
        df,
        #        orientation="h",
        x=color_selected,
        y="SRR",
        histfunc="avg",
        color=color_selected,
        hover_name="FacilityInfo"
        #       height=1000
    ).update_xaxes(categoryorder="mean ascending")

    fig3 = px.histogram(
        df,
        #        orientation="h",
        x=color_selected,
        y=x_selected,
        histfunc="avg",
        color=color_selected,
        hover_name="FacilityInfo"
        #       height=1000
    ).update_xaxes(categoryorder="mean ascending")

    return fig1, fig2, fig3
Exemplo n.º 2
0
def plot_trades_length_overview(df_trades, x='trade_len',  y='gross_returns'):
    ''' Plot visual insight for labels on x variable (default "trade_len"):
    1) histogram with count of x
    2) histogram with x vs average y (default "gross_returns")
    3) individual trades x vs y

    Takes as an input df_trades from stratgy pnl, with x and y being columns of df_trades
    '''

    max_trade_length = int(df_trades['trade_len'].max())
    hist_trade_length = px.histogram(df_trades, x=x, color='labels', title=f'<b>{x}</b>')
    avg = df_trades['trade_len'].mean() # average trade length
    hist_trade_length.add_shape(type="line", yref='paper',
        x0=avg, y0=0, x1=avg, y1=1,
        line=dict(color="rgba(0, 0, 0, 0.5)",width=2, dash="dashdot")
    )
    hist_trade_length.show()

    # Plot net returns (by length and average returns)
    hist_ret_len = px.histogram(df_trades, x=x, y=y, histfunc='avg', color='labels', nbins=max_trade_length, title=f'<b>{y} by {x}</b>')
    hist_ret_len.update_layout(yaxis=dict(tickformat=',.3%'))
    hist_ret_len.show()

    # Plot individual trades vs trade length
    avg_net_by_length = df_trades.groupby('trade_len')['gross_returns'].mean()
    ret_len_scatter = px.scatter(df_trades, x=x, y=y, color=df_trades['labels'].astype('str'), opacity=0.3, title=f'<b>{y} single trades</b>')
    ret_len_scatter.add_trace(go.Scatter(x=avg_net_by_length.index, y=avg_net_by_length.values, mode='lines', name='Average'))
    ret_len_scatter.update_layout(yaxis=dict(tickformat=',.3%'))
    ret_len_scatter.show()
Exemplo n.º 3
0
def get_graphes(country):

    # connect to mongo database
    client = MongoClient()
    db = client.baby_yoda
    collection = db['top_albums']

    # search the country
    cur = collection.find({"country":country}, {"price":1, "certif_UT":1})
    df_France = pd.DataFrame(list(cur))

    # figure 1 : Price according to sells in the selected country
    fig_sell_price = px.scatter(df_France, x = "certif_UT", y = "price",color_discrete_sequence=['cadetblue'], title = "Price according to sells in " + country, hover_name = "certif_UT", hover_data=['price'], labels = {"certif_UT" : "Selling number", "price" : "Prices"})
    fig_sell_price.update_traces(mode="markers", hovertemplate=None)
    fig_sell_price.update_layout({
        'plot_bgcolor': 'rgb(235, 236, 240)',
        'paper_bgcolor': 'rgb(235, 236, 240)',
        }, hovermode="x unified")

    # figure 2 : Distribution of genres in the selected country
    df_genre = pd.DataFrame(list(collection.find({"country":country}, {"genre":1})))
    nbr_genre = df_genre.explode('genre')['genre'].value_counts().to_frame().reset_index()
    nbr_genre = nbr_genre.rename(columns={"index": "genre", "genre": "number"})

    fig_genre_number = px.histogram(nbr_genre, x = "genre", y= "number", color_discrete_sequence=['cadetblue'], title = "Distribution of genres in " + country, labels = {"number" : "selling number", "genre" : "Genres"})
    fig_genre_number.update_layout({
        'plot_bgcolor': 'rgb(235, 236, 240)',
        'paper_bgcolor': 'rgb(235, 236, 240)',
        })

    # figure 3 : Distribution of top albums by year in the selected country
    cur = collection.find({"country":country}, {"year":1})
    df_year = pd.DataFrame(list(cur))
    fig_year = px.histogram(df_year, x = "year", color_discrete_sequence=['cadetblue'], title = "Distribution of top albums by year in " + country, labels = {"year" : "Years", "count" : "Number of top albums"})
    fig_year.update_layout({
        'plot_bgcolor': 'rgb(235, 236, 240)',
        'paper_bgcolor': 'rgb(235, 236, 240)',
        })


    list_sorted = collection.find({"country":country}).sort("certif_UT", -1)

    return fig_sell_price, fig_genre_number, fig_year, list(list_sorted)






    
 def update_histo_2(criteria_value):
     titre = 'Number of offence'
     if criteria_value == 'None':
         value = 'total'
     else:
         value = str(criteria_value)
         titre += ' per {}'.format(value)
     fig = px.histogram(CSV, x=value, color=value, title=titre)
     fig.update_layout(legend_orientation="h",
                       legend=dict(x=-0.01, y=1.1),
                       showlegend=False)
     return fig
Exemplo n.º 5
0
def generate_bolge_histogram(start_date, end_date, selector, segment,
                             kategori):

    filtered_df = retailer_df.sort_values("Siparis_Tarihi").set_index(
        "Sevk_Tarihi")[start_date:end_date]

    filtered_df = filtered_df[filtered_df["Segment"].isin(segment)
                              & filtered_df["Kategori"].isin(kategori)]

    if selector == "siparis":
        y_val = "Siparis_Miktari"
        yaxis_title = "Sipariş Miktarı"
        hist_func = 'count'
    elif selector == "satis":
        y_val = "Satis"
        yaxis_title = "Satış"
        hist_func = 'sum'
    else:
        y_val = "Kar"
        yaxis_title = "Kâr"
        hist_func = 'sum'

    figure = px.histogram(
        filtered_df,
        x="Siparis_Tarihi",
        y=y_val,
        color='Bolge',
        color_discrete_sequence=[
            '#c6dbef',
            '#9ecae1',
            '#6baed6',
            '#4292c6',
            '#2171b5',
            '#08519c',
            '#08306b',
        ],
        height=400,
        histfunc=hist_func,
        #marginal='box',
    )

    figure.update_layout(
        autosize=True,
        margin=dict(l=40, r=10, b=10, t=20),
        hovermode="closest",
        xaxis_title_text='Sipariş Dönemi',  # xaxis label
        yaxis_title_text=yaxis_title,  # yaxis label
        bargap=0.2,  # gap between bars of adjacent location coordinates
        bargroupgap=0.1  # gap between bars of the same location coordinates
    )

    return figure
 def update_histo_1(criteria_value):
     if criteria_value == 'None':
         value = None
     else:
         value = str(criteria_value)
     fig = px.histogram(CSV,
                        x="age",
                        color=value,
                        title='Age repartition',
                        nbins=20)
     fig.update_layout(legend_orientation="h",
                       legend=dict(x=-0.01, y=1.115))
     return fig
Exemplo n.º 7
0
def update_histogram(input_value):
    """
    Retourne l'histogramme en fonction de l'année.

    Args:
        input_value: l'année
    """
    if input_value is None:
        histogram = px.histogram(france, x='panneaux_marque', y='nb_panneaux')
        histogram.layout.xaxis.title = "marque du panneau"
        histogram.layout.yaxis.title = "nombre de panneaux installés"
        histogram.update_xaxes(categoryorder="total descending")
        return histogram

    constructeur_an_df = france[france.an_installation == input_value]
    histogram = px.histogram(constructeur_an_df,
                             x='panneaux_marque',
                             y='nb_panneaux')
    histogram.layout.xaxis.title = "marque du panneau"
    histogram.layout.yaxis.title = "nombre de panneaux installés"
    histogram.update_xaxes(categoryorder="total descending")

    return histogram
Exemplo n.º 8
0
    def update_figure(year_slider, month_slider, day_slider, media_dropdown,
                      mfm_dropdown, year_dropdown):

        dg = df.loc[(df["year"] == year_slider) & (df["month"] == month_slider)
                    & (df["day"] == day_slider)]

        fig1 = px.histogram(
            dg,
            x="female_percent",
            color="media_type",
        )

        fig2 = px.histogram(
            dg,
            x="male_percent",
            color="media_type",
        )

        fig3 = px.bar(dg.loc[(dg["media_type"] == media_dropdown)],
                      x="channel_name",
                      y=["female_percent", "male_percent", "music_percent"])

        #the definition of the new dataframe for the map
        sf_zone['value'][0] = df_zone["Zone_A_" + mfm_dropdown][year_dropdown]
        sf_zone['value'][1] = df_zone["Zone_B_" + mfm_dropdown][year_dropdown]
        sf_zone['value'][2] = df_zone["Zone_C_" + mfm_dropdown][year_dropdown]

        fig4 = px.choropleth(sf_zone,
                             geojson=sf_zone.geometry,
                             locations=sf_zone.index,
                             color='value',
                             scope="europe",
                             title="Proportions par zones de vacances",
                             hover_name="nom")
        fig4.update_geos(fitbounds="locations", visible=False)

        return (fig1, fig2, fig3, fig4)
 def update_histo_avg(avg_value, criteria_value, color_value):
     avg = str(avg_value)[0].upper() + str(avg_value)[1:]
     titre = '{} average'.format(avg)
     if criteria_value == 'None':
         value = 'total'
     else:
         value = str(criteria_value)
         titre += ' per {}'.format(value)
     fig = px.histogram(CSV,
                        x=value,
                        y=str(avg_value),
                        color=str(color_value),
                        title=titre,
                        histfunc='avg')
     fig.update_layout(legend_orientation="h", legend=dict(x=-0.01, y=1.1))
     return fig
def visualize():
    """Visualize experiment data per voting schme"""
    if not os.path.exists(FIG_DIR):
        os.mkdir(FIG_DIR)
    df = pd.read_csv(TARGET_FILE, index_col=0)
    for i in df["voting_scheme"].unique():
        df_plt = df.loc[df.voting_scheme == i, :]
        df_plt.rename(columns={
            "strat_voting_risk": "Risk",
            "run_time": "Time"
        },
                      inplace=True)

        # strat_voting image
        fig = px.scatter(df_plt,
                         x="n_candidates",
                         y="n_voters",
                         size="Risk",
                         color="Risk",
                         color_continuous_scale=px.colors.sequential.Viridis,
                         width=800,
                         height=800).update_layout(
                             xaxis_title="Number of Candidates",
                             yaxis_title="Number of Voters",
                             font=dict(family="Courier New, monospace",
                                       size=22,
                                       color="#1f1f1f"))
        fig.write_image(
            os.path.join(FIG_DIR, str(i)).replace(" ", "_") +
            "_strat_voting.png")

        # run time image
        fig = px.histogram(df_plt,
                           x="n_candidates",
                           y="n_voters",
                           color="Time",
                           color_discrete_sequence=custom_viridis,
                           width=800,
                           height=800).update_layout(
                               xaxis_title="Number of Candidates",
                               yaxis_title="Number of Voters",
                               font=dict(family="Courier New, monospace",
                                         size=22,
                                         color="#1f1f1f"))
        # TODO: rename y axis
        fig.write_image(
            os.path.join(FIG_DIR, str(i)).replace(" ", "_") + "_runtime.png")
Exemplo n.º 11
0
def CreateHistogram(age_data):
    """
    value : dictionary containing all the data about age
    return : an histogram representing the values
    """
    histogramme_age = {}
    for year in years:
        histogramme_age[year] = px.histogram(
            age_data[year],
            x="age",
            nbins=12,
            facet_col="profession",
            histnorm="probability",
            labels={"profession=": ""},
            color_discrete_sequence=['indianred'])
        histogramme_age[year]["layout"]["yaxis_title"] = "probability"
    return histogramme_age
Exemplo n.º 12
0
def showPlots(chart_select):
    if chart_select == 'Scatterplots':
        st.sidebar.subheader("Scatterplot Settings")
        try:
            x_values = st.sidebar.selectbox('X axis', options=numeric_columns)
            y_values = st.sidebar.selectbox('Y axis', options=numeric_columns)
            color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
            plot = px.scatter(data_frame=df, x=x_values, y=y_values, color=color_value)
            # display the chart
            st.plotly_chart(plot)
        except Exception as e:
            print(e)

    if chart_select == 'Lineplots':
        st.sidebar.subheader("Line Plot Settings")
        try:
            x_values = st.sidebar.selectbox('X axis', options=numeric_columns)
            y_values = st.sidebar.selectbox('Y axis', options=numeric_columns)
            color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
            plot = px.line(data_frame=df, x=x_values, y=y_values, color=color_value)
            st.plotly_chart(plot)
        except Exception as e:
            print(e)

    if chart_select == 'Histogram':
        st.sidebar.subheader("Histogram Settings")
        try:
            x = st.sidebar.selectbox('Feature', options=numeric_columns)
            bin_size = st.sidebar.slider("Number of Bins", min_value=10,
                                        max_value=100, value=40)
            color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
            plot = px.histogram(x=x, data_frame=df, color=color_value)
            st.plotly_chart(plot)
        except Exception as e:
            print(e)

    if chart_select == 'Boxplot':
        st.sidebar.subheader("Boxplot Settings")
        try:
            y = st.sidebar.selectbox("Y axis", options=numeric_columns)
            x = st.sidebar.selectbox("X axis", options=non_numeric_columns)
            color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
            plot = px.box(data_frame=df, y=y, x=x, color=color_value)
            st.plotly_chart(plot)
        except Exception as e:
            print(e)
Exemplo n.º 13
0
def statewise_trend(view):
    if view == 'Confirmed':
        val = 'Confirmed'
    elif view == 'Recovered':
        val = 'Recovered'
    elif view == 'Deaths':
        val = 'Deaths'
    elif view == 'Active':
        val = 'Active'
    else:
        val = 'Confirmed'

    fig1 = px.histogram(sc,
                        x=val,
                        y="State",
                        histfunc="sum",
                        color="State",
                        hover_name="State",
                        orientation="h",
                        template="seaborn")
    fig1.update_layout(
        xaxis={
            'title': 'Cases Count',
            'fixedrange': True,
            'gridcolor': colors['grid']
        },
        yaxis={
            'title': '',
            'fixedrange': True,
            'autorange': True,
            'gridcolor': colors['grid'],
            'showgrid': True
        },
        hovermode='closest',
        font=dict(color=colors['text']),
        paper_bgcolor=colors['background'],
        plot_bgcolor=colors['background'],
        showlegend=False,
        height=700,
        # ticks, xaxis, yaxis modifications
    )
    return fig1
Exemplo n.º 14
0
def update_histo(crime_dropdown):
    """renvoie un histogramme vide si rien n'est sélectionné,
    l'histogramme pour un crime si un seul crime selectionné,
    l'histogramme de la somme des crimes sinon"""
    if len(crime_dropdown) == 0:
        df_histo = get_dataframe('empty')
        crime = 'vide'
        y_label = {'année': 'Année', 'vide': 'nothing'}

    elif len(crime_dropdown) == 1:
        df_histo = get_dataframe()[['année', 'service', crime_dropdown[0]]]
        crime = crime_dropdown[0]
        y_label = {'année': 'Année'}

    else:
        df_histo = get_dataframe()[['année', 'service']]
        df_histo['crimes'] = get_dataframe()[crime_dropdown].sum(axis=1)
        crime = 'crimes'
        y_label = {'année': 'Année'}

    histo = px.histogram(df_histo,
                         x='année',
                         y=crime,
                         labels=y_label,
                         barmode='overlay',
                         color='service',
                         opacity=0.5,
                         color_discrete_sequence=[colors['GN'], colors['PN']])

    histo.update_layout(title=dict(
        text="Nombre de crimes et délits commis par année",
        font={"size": 30},
        y=0.97,
        x=0.5,
        xanchor='center',
        yanchor='top'),
                        paper_bgcolor=colors['background'],
                        font_color=colors['text'])

    return histo
Exemplo n.º 15
0
def update_figure1(graphic_type, x_axis, y_axis):
    """
    Retourne un graphique avec les axes selectionnés.

    Args:
        type: le type du graphique (nuage de points ou histogramme)
        x_axis: valeur de l'abscisse
        y_axis: valeur de l'ordonnée
    """
    if graphic_type == 'Nuage':
        nuage = px.scatter(france, x=x_axis, y=y_axis)
        nuage.layout.xaxis.title = Nom_colonnes[x_axis]
        nuage.layout.yaxis.title = Nom_colonnes[y_axis]
        return nuage
    if graphic_type == 'Histogramme':
        histogram = px.histogram(france, x=x_axis, y=y_axis)
        histogram.layout.xaxis.title = Nom_colonnes[x_axis]
        histogram.layout.yaxis.title = Nom_colonnes[y_axis]
        return histogram

    print('type error')
    return 'type error'
Exemplo n.º 16
0
def generate_islem_tipi_histogram(start_date, end_date, dataframe):
    # def generate_islem_tipi_histogram(start_date, end_date, selector, segment, kategori):

    # filtered_df = filtered_df.sort_values("Tarih").set_index("Sevk_Tarihi")[start_date:end_date]
    filtered_df = dataframe.sort_values("timestamp").set_index(
        "timestamp")[start_date:end_date]

    mycolors = np.array(
        [color for name, color in mcolors.TABLEAU_COLORS.items()])

    figure = px.histogram(
        filtered_df,
        x="Tarih",
        y='Tutar',
        color='Islem Tipi',
        color_discrete_sequence=mycolors,
        # [
        #     '#EA6A47',
        #     '#0091D5',
        #     ],
        # height = 400,
        histfunc='sum',
        #marginal='box',
    )

    figure.update_layout(
        autosize=True,
        margin=dict(l=40, r=10, b=10, t=20),
        hovermode="closest",
        xaxis_title_text='Dönem',  # xaxis label
        yaxis_title_text='Tutar',  # yaxis label
        bargap=0.2,  # gap between bars of adjacent location coordinates
        bargroupgap=0.1  # gap between bars of the same location coordinates
    )

    return figure
Exemplo n.º 17
0
                              color=color_value)

        else:
            plot = px.scatter(data_frame=df, x=x_value, y=y_value)

        # display chart in streamlit
        st.plotly_chart(plot)

    if chart_select == 'Histogram':
        st.sidebar.subheader("Settings for Histogram")
        x = st.sidebar.selectbox(label="Feature", options=numeric_columns)
        bin_size = st.sidebar.slider(label="Number of bins",
                                     min_value=10,
                                     max_value=100,
                                     value=50)
        plot = px.histogram(data_frame=df, x=x, nbins=bin_size)
        st.plotly_chart(plot)

    if chart_select == 'Lineplots':
        st.sidebar.subheader("Settings for Line plots.")
        x_value = st.sidebar.selectbox(label='X axis', options=numeric_columns)
        y_value = st.sidebar.selectbox(label='Y axis', options=numeric_columns)

        plot = px.line(data_frame=df, x=x_value, y=y_value)

        # display the chart
        st.plotly_chart(plot)

except Exception as e:
    print(e)
Exemplo n.º 18
0
                                 'dep']], 2019, {"2019": "Nombre d'habitants"},
                   "Répartition de la population française", "blues")

map_den = create_map(df_empty,
                     'vide', {'vide': 'Nb crimes pour 1000 hbs'},
                     "Densité des crimes et délits en France",
                     "purples",
                     mini=0,
                     maxi=10)

hist = px.histogram(df[['année', 'service', list_crimes[10]]],
                    x='année',
                    y=list_crimes[10],
                    labels={
                        'année': 'Années',
                        'y': 'Somme des ' + list_crimes[10]
                    },
                    color='service',
                    barmode='overlay',
                    opacity=0.5,
                    color_discrete_sequence=[colors['GN'], colors['PN']])

hist.update_layout(title=dict(
    text="Nombre de crimes et délits commis par année",
    font={"size": 30},
    y=0.9,
    x=0.5,
    xanchor='center',
    yanchor='top'),
                   paper_bgcolor=colors['background'],
                   font_color=colors['text'])
Exemplo n.º 19
0
                         color=color_value,
                         barmode=mode)
            st.plotly_chart(fig)
        elif plot_type == "line":
            fig = px.line(dfp,
                          x='Tahun',
                          y=selected_columns,
                          color=color_value)
            st.plotly_chart(fig)
        elif plot_type == "area":
            fig = px.area(dfp,
                          x='Tahun',
                          y=selected_columns,
                          color=color_value)
        elif plot_type == "hist":
            fig = px.histogram(dfp, x=selected_columns)
            st.plotly_chart(fig)
        elif plot_type == "box":
            fig = px.box(dfp, y=selected_columns, x=xbox)
            st.plotly_chart(fig)
        elif plot_type:
            cust_plot = dfp[selected_columns].plot(kind=plot_type)
            st.write(cust_plot)
            st.pyplot()

    if st.checkbox("Budget Allocation vs Index Change Analysis"):
        # df = pd.read_csv('belanja_apbd.csv',sep=";")
        df = pd.read_excel('belanja_apbd_full.xlsx')
        df.loc[:, 'Total_anggaran'] = df.sum(numeric_only=True, axis=1)
        st.subheader('Anomali Detection from Trends and Outliers Analysis ')
        index = st.selectbox('Choose Index',
Exemplo n.º 20
0
def main():
    activities = [
        'EDA', 'Visualization', 'Regression', 'Classification',
        'Documentation', 'About Us'
    ]
    st.sidebar.success('Updates Coming Soon! 🌟🎉')
    option = st.sidebar.selectbox('Choose a section', activities)
    st.sidebar.markdown(
        '''Use this section for finding useful insights about your data,and feel free to use them in your notebooks
                                             
    🎯   Version : 1.0.1  ''')

    if option == 'EDA':
        st.subheader("Explanatory Data Analysis")

        data = st.file_uploader("Please upload a CSV dataset ", type=['csv'])

        st.warning('Your dataset goes here...')
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df)
            st.info('Some useful data insights about your data')
            if st.checkbox("Display shape"):
                r, c = df.shape
                st.write('Rows = ', r, 'Columns = ', c)

            if st.checkbox('Display columns'):
                st.write(df.columns)

            if st.checkbox('Select multiple columns'):
                selected_col = st.multiselect('Select preferred columns',
                                              df.columns)
                df1 = df[selected_col]
                st.dataframe(df1)

            if st.checkbox("Head"):
                st.write(df.head())

            if st.checkbox('Tail'):
                st.write(df.tail())

            if st.checkbox('Null values'):
                st.write(df.isnull().sum())

            if st.checkbox('Data types'):
                st.write(df.dtypes)

            if st.checkbox('Random sample'):
                st.write(df.sample(20))

            if st.checkbox('Display correlations'):
                st.write(df.corr())

            if st.checkbox('Summary'):
                st.write(df.describe(include='all').T)

    elif option == 'Visualization':
        st.subheader("Data Visualization and Graphing")

        st.sidebar.subheader("File Upload")

        # Setup file upload
        uploaded_file = st.sidebar.file_uploader(
            label="Upload your CSV file. (200MB max)", type=['csv'])

        if uploaded_file is not None:
            st.success('Your data goes here')

        try:
            df = pd.read_csv(uploaded_file)
        except Exception as e:
            st.warning('Data not found')

        global numeric_columns
        global non_numeric_columns
        try:
            st.write(df)
            numeric_columns = list(df.select_dtypes(['float', 'int']).columns)
            non_numeric_columns = list(df.select_dtypes(['object']).columns)
            non_numeric_columns.append(None)
            print(non_numeric_columns)
        except Exception as e:
            print(e)

        chart_select = st.sidebar.selectbox(label="Select the chart type",
                                            options=[
                                                'Scatterplots', 'Lineplots',
                                                'Histogram', 'Boxplot',
                                                'Violinplot', 'Piechart'
                                            ])

        st.info('The Graphs generated will be displayed here')

        if chart_select == 'Scatterplots':
            st.sidebar.subheader("Scatterplot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.scatter(data_frame=df,
                                  x=x_values,
                                  y=y_values,
                                  color=color_value)
                # display the chart
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Lineplots':
            st.sidebar.subheader("Line Plot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.line(data_frame=df,
                               x=x_values,
                               y=y_values,
                               color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Histogram':
            st.sidebar.subheader("Histogram Settings")
            try:
                x = st.sidebar.selectbox('Feature', options=numeric_columns)
                bin_size = st.sidebar.slider("Number of Bins",
                                             min_value=10,
                                             max_value=100,
                                             value=40)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.histogram(x=x, data_frame=df, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Boxplot':
            st.sidebar.subheader("Boxplot Settings")
            try:
                y = st.sidebar.selectbox("Y axis", options=numeric_columns)
                x = st.sidebar.selectbox("X axis", options=non_numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.box(data_frame=df, y=y, x=x, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Piechart':
            st.sidebar.subheader("Piechart Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=non_numeric_columns)
                plot = px.pie(data_frame=df, values=x_values, names=y_values)
                st.plotly_chart(plot)

            except Exception as e:
                print(e)

        if chart_select == 'Violinplot':
            st.sidebar.subheader("Violin Plot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.violin(data_frame=df,
                                 x=x_values,
                                 y=y_values,
                                 color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

    elif option == 'Regression':
        st.subheader("Regression ML Model Builder")

        # Model building
        def build_model(df):
            l = len(df)

            #df = df.iloc[:100]
            X = df.iloc[:, :
                        -1]  # Using all column except for the last column as X
            Y = df.iloc[:, -1]  # Selecting the last column as Y

            st.markdown('**1.2. Dataset dimension**')
            st.write('X (Independent Axis)')
            st.info(X.shape)
            st.write('Y (Dependent Axis)')
            st.info(Y.shape)

            st.markdown('**1.3. Variable details**:')
            st.write('X variable (first few are shown)')
            st.info(list(X.columns[:int(l / 5)]))
            st.write('Y variable')
            st.info(Y.name)

            # Build lazy model
            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=split_size, random_state=seed_number)
            reg = LazyRegressor(verbose=0,
                                ignore_warnings=False,
                                custom_metric=None)
            models_train, predictions_train = reg.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = reg.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('2.Model Performance Plot (Training Set)')

            st.write('Training set')
            st.write(predictions_train)
            st.markdown(filedownload(predictions_train, 'training.csv'),
                        unsafe_allow_html=True)

            st.write('Test set')
            st.write(predictions_test)
            st.markdown(filedownload(predictions_test, 'test.csv'),
                        unsafe_allow_html=True)

            st.subheader('3.Model Performance Plot(Test set)')

            with st.markdown('**R-squared**'):
                # Tall
                predictions_test["R-Squared"] = [
                    0 if i < 0 else i for i in predictions_test["R-Squared"]
                ]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="darkgrid")
                ax1 = sns.barplot(y=predictions_test.index,
                                  x="R-Squared",
                                  data=predictions_test)
                ax1.set(xlim=(0, 1))
            st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 3))
            sns.set_theme(style="darkgrid")
            ax1 = sns.barplot(x=predictions_test.index,
                              y="R-Squared",
                              data=predictions_test)
            ax1.set(ylim=(0, 1))
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                        unsafe_allow_html=True)

            with st.markdown('**RMSE (capped at l/2)**'):
                # Tall
                predictions_test["RMSE"] = [(l / 2) if i > (l / 2) else i
                                            for i in predictions_test["RMSE"]]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="darkgrid")
                ax2 = sns.barplot(y=predictions_test.index,
                                  x="RMSE",
                                  data=predictions_test)
            st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 3))
            sns.set_theme(style="darkgrid")
            ax2 = sns.barplot(x=predictions_test.index,
                              y="RMSE",
                              data=predictions_test)
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'),
                        unsafe_allow_html=True)

            with st.markdown('**Calculation time**'):
                # Tall
                predictions_test["Time Taken"] = [
                    0 if i < 0 else i for i in predictions_test["Time Taken"]
                ]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="darkgrid")
                ax3 = sns.barplot(y=predictions_test.index,
                                  x="Time Taken",
                                  data=predictions_test)
            st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(9, 3))
            sns.set_theme(style="darkgrid")
            ax3 = sns.barplot(x=predictions_test.index,
                              y="Time Taken",
                              data=predictions_test)
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                        unsafe_allow_html=True)

        def filedownload(df, filename):
            csv = df.to_csv(index=False)
            b64 = base64.b64encode(
                csv.encode()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        def imagedownload(plt, filename):
            s = io.BytesIO()
            plt.savefig(s, format='pdf', bbox_inches='tight')
            plt.close()
            b64 = base64.b64encode(
                s.getvalue()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        with st.sidebar.header('File Uploader Section'):
            uploaded_file = st.sidebar.file_uploader(
                "Upload an input as CSV file", type=["csv"])

        with st.sidebar.header(
                'Set the optimization parameters\n (Grab the slider and set to any suitable point)'
        ):

            split_size = st.sidebar.slider('Data split ratio (in fraction):',
                                           0.0, 1.0, 0.7, 0.01)
            seed_number = st.sidebar.slider('Set the random-seed-value :', 0,
                                            1, 100, 5)

        with st.sidebar.header('Project made by:'):
            st.write("Made by: MAINAK CHAUDHURI")

        #---------------------------------#

        st.subheader('Dataset display')

        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            st.markdown('**Snap of the dataset**')
            st.write(df)
            build_model(df)
        else:
            st.info('Upload a file')
            st.info('OR')
            if st.button('Use preloaded data instead'):
                st.info("Dataset used : Pima diabetes")

                diabetes = load_diabetes()

                X = pd.DataFrame(diabetes.data,
                                 columns=diabetes.feature_names).loc[:100]
                Y = pd.Series(diabetes.target, name='response').loc[:100]
                df = pd.concat([X, Y], axis=1)

                st.markdown(
                    'Displaying results form a sample preloaded data :')
                st.write(df.head(5))

                build_model(df)

    elif option == 'Classification':
        st.subheader("Classifier ML Model Builder")

        def build_model(df):
            l = len(df)

            #df = df.iloc[:100]
            X = df.iloc[:, :
                        -1]  # Using all column except for the last column as X
            Y = df.iloc[:, -1]  # Selecting the last column as Y

            st.markdown('**1.2. Dataset dimension**')
            st.write('X (Independent Axis)')
            st.info(X.shape)
            st.write('Y (Dependent Axis)')
            st.info(Y.shape)

            st.markdown('**1.3. Variable details**:')
            st.write('X variable (first few are shown)')
            st.info(list(X.columns[:int(l / 5)]))
            st.write('Y variable')
            st.info(Y.name)

            # Build lazy model
            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=split_size, random_state=seed_number)
            clf = LazyClassifier(verbose=0,
                                 ignore_warnings=False,
                                 custom_metric=None)
            models_train, predictions_train = clf.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = clf.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('2.Model Performance Plot (Training Set)')

            st.write('Training set')
            st.write(predictions_train)
            st.markdown(filedownload(predictions_train, 'training.csv'),
                        unsafe_allow_html=True)

            st.write('Test set')
            st.write(predictions_test)
            st.markdown(filedownload(predictions_test, 'test.csv'),
                        unsafe_allow_html=True)

            st.subheader('3.Model Performance Plot(Test set)')

            with st.markdown('**Accuracy**'):
                # Tall
                predictions_test["Accuracy"] = [
                    0 if i < 0 else i for i in predictions_test["Accuracy"]
                ]
                plt.figure(figsize=(5, 12))
                sns.set_theme(style="darkgrid")
                ax1 = sns.barplot(y=predictions_test.index,
                                  x="Accuracy",
                                  data=predictions_test)
                ax1.set(xlim=(0, 1))
            st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 5))
            sns.set_theme(style="darkgrid")
            ax1 = sns.barplot(x=predictions_test.index,
                              y="Accuracy",
                              data=predictions_test)
            ax1.set(ylim=(0, 1))
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                        unsafe_allow_html=True)

        def filedownload(df, filename):
            csv = df.to_csv(index=False)
            b64 = base64.b64encode(
                csv.encode()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        def imagedownload(plt, filename):
            s = io.BytesIO()
            plt.savefig(s, format='pdf', bbox_inches='tight')
            plt.close()
            b64 = base64.b64encode(
                s.getvalue()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        with st.sidebar.header('File Uploader Section'):
            uploaded_file = st.sidebar.file_uploader(
                "Upload an input as CSV file", type=["csv"])

        with st.sidebar.header(
                'Set the optimization parameters\n (Grab the slider and set to any suitable point)'
        ):

            split_size = st.sidebar.slider('Data split ratio (in fraction):',
                                           0.0, 1.0, 0.7, 0.01)
            seed_number = st.sidebar.slider('Set the random-seed-value :', 0,
                                            1, 100, 5)

        with st.sidebar.header('Project made by:'):
            st.write("Made by: MAINAK CHAUDHURI")

        #---------------------------------#

        st.subheader('Dataset display')

        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            st.markdown('**Snap of the dataset**')
            st.write(df)
            build_model(df)
        else:
            st.info('Upload a file')
            st.info('OR')
            if st.button('Use preloaded data instead'):
                st.info("Dataset used : Pima diabetes")

                diabetes = load_diabetes()

                X = pd.DataFrame(diabetes.data,
                                 columns=diabetes.feature_names).loc[:100]
                Y = pd.Series(diabetes.target, name='response').loc[:100]
                df = pd.concat([X, Y], axis=1)

                st.markdown(
                    'Displaying results form a sample preloaded data :')
                st.write(df.head(5))

                build_model(df)

    elif option == 'Documentation':
        st.subheader("How to use Notebooker Pro")

    elif option == 'About Us':
        st.subheader("About Us 😊")
        st.markdown(
            '''This web application is made by Mainak Chaudhuri. He is a Computer Science and Engineering student of the SRM University, studying in the second year of B.Tech. The main idea of this application is to help beginners and data science enthusiasts chalk out a plan for preparing a good data science notebook, for college projects, online courses or to add in their portfolio. This application accepts a dataset from the user and displays useful insights about the data. Additionally, it also helps the user visualize the data, choose the best supervised machine learning model (regression & classifaction handled separately) and decide the best suit depending on the dataset size,split and seed values which can be set by the user with the help of the side panel. This application claims to be the first of it's kind ever developed till date by a single developer and also has a serving history and positive reports from 180+ users.
                    
                    
     👉   N.B. : This application is an intellectual property of Mainak Chaudhuri and hence holds a reserved copyright. Any form of illegal immitation of graphics, contents or documentation without prior permission of the owner if proved, can result in legal actions against the plagiarist.'''
        )

        st.success('For more info, feel free to contact @ : ')
        url = 'https://www.linkedin.com/in/mainak-chaudhuri-127898176/'

        if st.button('Mainak Chaudhuri'):
            webbrowser.open_new_tab(url)
AGE = 36
AGES = np.sort(AGES.unique())
DATA_AGE = {age: CSV.query("age == @age") for age in AGES}

# ------------------------------------------------------------------------------------
# Main
# ------------------------------------------------------------------------------------

if __name__ == '__main__':

    APP = dash.Dash(__name__)

    FIG_HISTO_1 = px.histogram(CSV,
                               x="age",
                               color="race",
                               title='Age repartition',
                               nbins=20)
    FIG_HISTO_2 = px.histogram(CSV,
                               x="race",
                               color="race",
                               title='Number of offence per race')

    FIG_AGE = px.scatter(DATA_AGE[AGE],
                         x="height (m)",
                         y="weight (Kg)",
                         title='''Weight vs height relation of the offenders
                               ({} years old)'''.format(AGE),
                         color='race',
                         size='age',
                         hover_data=['birth date', 'block'],
Exemplo n.º 22
0
        'text': "<b>Data desbalanceada",
        # 'y':0.928,
        'x': 0.48,
        'yanchor': 'middle',
        'xanchor': 'center'
    },
    # legend=dict(    orientation="h",
    #                             yanchor="bottom",
    #                             y=0.93,
    #                             xanchor="right",
    #                             x=1
    #                         ),
)

fig3 = px.histogram(df,
                    x="Ocupacion",
                    color='Target',
                    color_discrete_sequence=px.colors.qualitative.T10)
fig3.update_layout(barmode='stack')
fig3.update_layout(
    # autosize=False,
    # width=490,
    yaxis_title_text='<b>Recuento<b>',
    xaxis_title_text='<b>Ocupación<b>',
    # height=390,
    # title='Gráfico de Frecuencias',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=53, r=10, b=50, t=50, pad=1),
    title={
        'text': "<b>Ocupación VS Target VD<b>",
        'y': 0.928,
Exemplo n.º 23
0
        x_values = st.sidebar.selectbox('X axis', options=numeric_columns)
        y_values = st.sidebar.selectbox('Y axis', options=numeric_columns)
        color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
        plot = px.line(data_frame=df, x=x_values, y=y_values, color=color_value)
        st.plotly_chart(plot)
    except Exception as e:
        print(e)

if chart_select == 'Histogram':
    st.sidebar.subheader("Histogram Settings")
    try:
        x = st.sidebar.selectbox('Feature', options=numeric_columns)
        bin_size = st.sidebar.slider("Number of Bins", min_value=10,
                                     max_value=100, value=40)
        color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
        plot = px.histogram(x=x, data_frame=df, color=color_value)
        st.plotly_chart(plot)
    except Exception as e:
        print(e)

if chart_select == 'Boxplot':
    st.sidebar.subheader("Boxplot Settings")
    try:
        y = st.sidebar.selectbox("Y axis", options=numeric_columns)
        x = st.sidebar.selectbox("X axis", options=non_numeric_columns)
        color_value = st.sidebar.selectbox("Color", options=non_numeric_columns)
        plot = px.box(data_frame=df, y=y, x=x, color=color_value)
        st.plotly_chart(plot)
    except Exception as e:
        print(e)     
        st.balloons()   
Exemplo n.º 24
0
    st.markdown("%i tweets between %i:00 and %i:00" %
                (len(modified_data), hour, hour + 1))
    st.map(modified_data)
    if st.sidebar.checkbox("Show raw data", False):
        st.write(modified_data)

st.sidebar.subheader("Breakdown")
choice = st.sidebar.multiselect(
    'Pick Airlines', ('US Airways', 'United', 'American', 'Soutwest', 'Delta'),
    key='0')

if len(choice) > 0:
    choice_data = data[data.airline.isin(choice)]
    fig_choice = px.histogram(choice_data,
                              x='airline',
                              y='airline_sentiment',
                              histfunc='count',
                              color='airline_sentiment',
                              facet_col='airline_sentiment')
    st.plotly_chart(fig_choice)

st.sidebar.header("Word Cloud")
word_sentiment = st.sidebar.radio('Display for sentiment',
                                  ('positive', 'negative', 'neutral'))

if not st.sidebar.checkbox('Close', True, key='3'):
    st.header('Word cloud for %s sentiment' % (word_sentiment))
    df = data[data['airline_sentiment'] == word_sentiment]
    words = ' '.join(df['text'])
    processed_words = ' '.join([
        word for word in words.split()
        if 'http' not in word and not word.startswith('@') and word != 'RT'
Exemplo n.º 25
0
def main():
    st.title("Predict Salary")
    gender = st.text_input("Gender: M/F")
    ssc_p = float(st.number_input("SSC percentile"))
    ssc_b = st.text_input("SSC B:Central/Others")
    hsc_p = float(st.number_input("HSC percentile"))
    hsc_b = st.text_input("HSC B: Central/Others")
    hsc_s = st.text_input("HSC S: Commerce/Science/Arts")
    degree_p = float(st.number_input("Degree percentile"))
    degree_t = st.text_input("Degree T: Sci&Tech/Comm&Mgmt/Others")
    workex = st.text_input("Workex: Yes/No")
    etest_p = float(st.number_input("E-test percentile"))
    special = st.text_input("Special: Mkt&Fin/Mkt&HR")
    mba_p = float(st.number_input("MBA percentile"))
    status = st.text_input("Status: Placed/Not Placed")

    if st.button("Predict Salary"):
        output = func2(gender, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p,
                       degree_t, workex, etest_p, special, mba_p, status)
        st.success('Estimated salary: {}'.format(output))
    if st.button("Placed or Not"):
        output = func1(gender, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p,
                       degree_t, workex, etest_p, special, mba_p)
        st.success('Prediction {}'.format(output))
    agree = st.checkbox("Hide Graphs")
    if (not agree):
        st.sidebar.subheader("Visualization Settings")
        try:
            st.write(df)
            numeric_columns = list(df.select_dtypes(['float', 'int']).columns)

            non_numeric_columns = list(df.select_dtypes(['object']).columns)

            non_numeric_columns.append(None)
            print(numeric_columns)
        except Exception as e:
            print(e)
            st.write("Please upload file to the application.")
        # add a select widget to the side bar
        chart_select = st.sidebar.selectbox(
            label="Select the chart type",
            options=['Histogram', 'Lineplots', 'Scatterplots', 'Boxplot'])

        if chart_select == 'Scatterplots':
            st.sidebar.subheader("Scatterplot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.scatter(data_frame=df,
                                  x=x_values,
                                  y=y_values,
                                  color=color_value)
                # display the chart
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Lineplots':
            st.sidebar.subheader("Line Plot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.line(data_frame=df,
                               x=x_values,
                               y=y_values,
                               color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Histogram':
            st.sidebar.subheader("Histogram Settings")
            try:
                x = st.sidebar.selectbox('Feature', options=numeric_columns)
                bin_size = st.sidebar.slider("Number of Bins",
                                             min_value=10,
                                             max_value=100,
                                             value=40)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.histogram(x=x, data_frame=df, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Boxplot':
            st.sidebar.subheader("Boxplot Settings")
            try:
                y = st.sidebar.selectbox("Y axis", options=numeric_columns)
                x = st.sidebar.selectbox("X axis", options=non_numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.box(data_frame=df, y=y, x=x, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)
Exemplo n.º 26
0
def show(new_df, mode, graph, details, distance):
    with st.spinner(
            'Please wait : the app is preparing data for visualization'):
        st.write('')
        '''
        **Nombre de formations :**
        '''
        st.write(len(new_df))
        '''
        **Tarif horaire moyen des formations :**
        '''
        st.write(np.round(new_df['Tarif horaire'].mean(), 2), '€')
        '''
        **Durée moyenne des formations :**
        '''
        st.write(np.round(new_df['Durée'].mean(), 2), 'heures')
        '''
        **Tarif moyen des formations :**
        '''
        st.write(np.round(new_df['Tarif TTC'].mean(), 2), '€')

        if mode == 'global':
            '''
            **Tarif horaire moyen des formations longues (ie > 140 heures) :**
            '''
            st.write(
                np.round(new_df[new_df['Durée'] > 140]['Tarif horaire'].mean(),
                         2), '€')
            '''
            **Tarif horaire moyen des formations courtes (ie < 140 heures):**
            '''
            st.write(
                np.round(new_df[new_df['Durée'] < 140]['Tarif horaire'].mean(),
                         2), '€')
        '''
        **TOP 50 des organismes proposant le plus de formations  :**
        '''
        organism = pd.DataFrame(
            new_df.groupby(by='Organisme').count()['Nom'].values,
            index=new_df.groupby(by='Organisme').count()['Nom'].index,
            columns=['Nombre'])
        organisme_50 = organism.sort_values(by='Nombre', ascending=False)[:50]
        fig = px.bar(x=organisme_50.index, y=organisme_50['Nombre'].values)
        st.plotly_chart(fig)

        if mode != 'city' and distance == False:
            '''
            **Répartition présentiel / à distance  :**
            '''
            values = [
                new_df['Présentiel ou à distance']
                [new_df['Présentiel ou à distance'] == 'Présentiel'].count(),
                new_df['Présentiel ou à distance'][
                    new_df['Présentiel ou à distance'] == 'Distance'].count()
            ]
            fig = go.Figure(data=[
                go.Pie(labels=['Présentiel', 'A distance'],
                       values=values,
                       hole=.3)
            ])
            st.plotly_chart(fig)
        '''
        **Résumé statistique :**
        '''
        st.write(new_df.describe())

        if graph:
            '''
            **Tarif horaire selon la durée des formations  :**
            '''
            fig = px.scatter(new_df,
                             x='Tarif horaire',
                             y='Durée',
                             color='Ville',
                             hover_name='Organisme',
                             marginal_y="histogram",
                             marginal_x="histogram",
                             range_x=[0, 200])
            st.plotly_chart(fig)
            '''
            **Distribution des tarifs horaires  :**
            '''
            fig = px.histogram(new_df,
                               x="Tarif horaire",
                               color='Ville',
                               hover_data=new_df.columns,
                               range_x=[0, 200])
            st.plotly_chart(fig)

        if details:
            '''
            **Détails des formations:**
            '''
            st.dataframe(new_df)
gapminder.tail(4)

# ### Simple Visualizations

# ### Bar

# In[196]:

px.bar(data_frame=gapminder[gapminder.year == 2007], x='country', y='pop')

# ### Histogram

# In[197]:

px.histogram(data_frame=gapminder, x='pop')

# ### Box plot - Normal

# In[198]:

px.box(data_frame=gapminder, y='lifeExp')

# ### Box Plot - Colored / Grouped

# In[199]:

px.box(data_frame=gapminder, y='lifeExp', color='continent')

# ### Violin plot
Exemplo n.º 28
0
def main():
    st.title("Infographs")

    html_temp = """
    <div style="background-color:Red;padding:10px">
    <h1 style="color:orange;text-align:center;"><em>Infographs</em> </h1>
    </div>
    <br></br>
    """

    offer = ('Offer-Type-1', 'Offer-Type-2', 'Offer-Type-3', 'Offer-Type-4',
             'Offer-Type-5', 'Offer-Type-6', 'Offer-Type-7', 'Offer-Type-8',
             'Offer-Type-9', 'Offer-Type-10')
    option_offer = list(range(len(offer)))
    offer_type = st.selectbox("Offer_type",
                              option_offer,
                              format_func=lambda x: offer[x])
    offer_type = offer_type + 1
    gender_box = ('M', 'F', 'O')
    gender = st.selectbox("Gender", options=list(gender_box))
    days_of_membership = st.number_input("Days Of Memebership")
    total_amount = st.number_input("Amount(Bill of the customer)")
    year = st.number_input("year")
    age_in_range = st.slider("Age", 0, 100)
    age_in_range = str(age_in_range) + "s"
    income_in_range = st.text_input("Income Of The Customer")
    result = ""
    if st.button("Predict"):
        result = membership_predict(offer_type, gender, days_of_membership,
                                    total_amount, year, age_in_range,
                                    income_in_range)
        if (result == 1):
            st.success("The Customer Will Take Member Ship")
        elif (result == 0):
            st.warning("He wont Take")
    agree = st.checkbox("Hide Graphs")
    if (not agree):
        st.sidebar.subheader("Visualization Settings")
        try:
            st.write(df)
            # numeric_columns = list(df.select_dtypes(['float', 'int']).columns)
            numeric_columns = list(
                ['income', 'total_amount', 'days_of_membership'])

            non_numeric_columns = list(df.select_dtypes(['object']).columns)

            non_numeric_columns.append(None)
            print(numeric_columns)
        except Exception as e:
            print(e)
            st.write("Please upload file to the application.")
        # add a select widget to the side bar
        chart_select = st.sidebar.selectbox(
            label="Select the chart type",
            options=['Histogram', 'Lineplots', 'Scatterplots', 'Boxplot'])

        if chart_select == 'Scatterplots':
            st.sidebar.subheader("Scatterplot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.scatter(data_frame=df,
                                  x=x_values,
                                  y=y_values,
                                  color=color_value)
                # display the chart
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Lineplots':
            st.sidebar.subheader("Line Plot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.line(data_frame=df,
                               x=x_values,
                               y=y_values,
                               color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Histogram':
            st.sidebar.subheader("Histogram Settings")
            try:
                x = st.sidebar.selectbox('Feature', options=numeric_columns)
                bin_size = st.sidebar.slider("Number of Bins",
                                             min_value=10,
                                             max_value=100,
                                             value=40)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.histogram(x=x, data_frame=df, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Boxplot':
            st.sidebar.subheader("Boxplot Settings")
            try:
                y = st.sidebar.selectbox("Y axis", options=numeric_columns)
                x = st.sidebar.selectbox("X axis", options=non_numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.box(data_frame=df, y=y, x=x, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)
import plotly_express as px
import pandas as pd

iris_df = pd.read_csv("../data/iris.csv")

print(iris_df.head())

basic_histogram = px.histogram(data_frame=iris_df,
                               nbins=30,
                               title="Histogram plot of Sepal Length",
                               x='SepalLengthCm')

basic_histogram.show()
Exemplo n.º 30
0
    # Keep data in sample
    df = df[(df['Date'] > '2010-01-01')].reset_index(drop=True)

    # Timedelta of injuries
    df[['Date','injury_end']] = df[['Date','injury_end']].apply(pd.to_datetime)  # if conversion required
    df['injury_duration'] = (df['injury_end'] - df['Date']).dt.days

    df = df[(df['injury_duration'] <= 365)]
    return df


injury_working = pre_process(injury_in_sample)

# Plot a histogram of injury duration
fig = px.histogram(injury_working, x="injury_duration", title="Histogram of DL / IL stint length since 2010")
fig.show()


def grouping(df):
    array_func = lambda x: set(x)
    injuries_array = df.pivot_table(index=['Player', 'Year'], values='Injury',
                                    aggfunc=array_func, margins=False).reset_index()
    df['Year'] = df['Date'].dt.year + 1  # Create season variable to align with year of FA
    df = df.groupby(['Year', 'Player']).agg({
        'injury_duration': sum,
    }).reset_index()

    df_merged = pd.merge(df, injuries_array, on=['Player', 'Year'], how='left')
    return df_merged