import plotly.express as px

fig = px.bar(width=400)
fig.show()
Пример #2
0
    else:
        date_options[weeks] = date_item.strftime("%b %d")
    weeks = weeks + 1

#Pick a default date
inprogress = data_US['date'][data_US.index[-3]]
first_date = data_US['date'][data_US.index[0]]
last_date = data_US['date'][data_US.index[-1]]

#Set up the charts
import plotly.express as px
import plotly.graph_objects as go

#fig = go.Figure() # or any Plotly Express function e.g. px.bar(...)
fig = px.choropleth()
bar1 = px.bar()
bar2 = px.bar()

colorscale = [
    "#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9",
    "#9ecae1", "#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be",
    "#2171b5", "#1361a9", "#08519c", "#0b4083", "#08306b"
]  # or px.colors.sequential.Plasma

#Style the charts
plotcfg = {'displayModeBar': False}
#fig1.update_xaxes(rangeslider_visible=True)
fig.update_layout(height=500,
                  margin=dict(l=0, r=0, b=0, t=0, pad=0),
                  coloraxis_colorbar=dict(title="Z-score"),
                  plot_bgcolor='rgb(255,255,255)')
import plotly.express as px

fig = px.bar()
fig.show()
Пример #4
0
    2016: '2016',
    2017: '2017',
    2018: '2018',
    2019: '2019'
}

c = list(df["ZIP CODE"].astype(str))
nomi = pgeocode.Nominatim('us')
df[["latitude",
    "longitude"]] = nomi.query_postal_code(c)[["latitude", "longitude"]]
df['detail'] = df["ADDRESS 1"] + " " + df['ZIP CODE'].astype(
    str) + ' payment: ' + df['payments'].astype(str)

fig1 = px.bar(year_fund,
              x='year',
              y='funded_amount',
              color="purpose",
              barmode="group")
fig1.update_layout(title='Total Funded Amount based on Need in every years')
fig2 = px.scatter_mapbox(df,
                         lat="latitude",
                         lon="longitude",
                         color='BUILDING CLASS CATEGORY',
                         text=df['detail'],
                         zoom=9.5,
                         mapbox_style='open-street-map')

fig2.update_layout(title='New Yokers Payment based on Building Class Category')

app.layout = html.Div([
    dcc.Graph(id='graph1', figure=fig1),
    ]].sample(n=1).iat[0, 0])

st.sidebar.subheader('Number of Tweets by Sentiment')
show_hide1 = st.sidebar.checkbox('Show Vizualization', True, key='1')
viz_select = st.sidebar.selectbox('Vizualization Type',
                                  ['Histogram', 'Pie Chart'])
sentiment_count = data['airline_sentiment'].value_counts()
sentiment_count = pd.DataFrame({
    'Sentiment': sentiment_count.index,
    'Tweets': sentiment_count.values
})

if show_hide1:
    st.subheader('Number of Tweets by Sentiment')
    if viz_select == 'Histogram':
        fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', height=500)
        st.plotly_chart(fig)
    elif viz_select == 'Pie Chart':
        fig = px.pie(sentiment_count, values='Tweets', names='Sentiment')
        st.plotly_chart(fig)

st.sidebar.subheader('When and Where are Users Tweeting from?')
show_hide2 = st.sidebar.checkbox('Show Vizualization', True, key='2')
select_hour = st.sidebar.slider('Hour of Day', 0, 23)
select_raw_data = st.sidebar.checkbox('Show Raw Data', False)
selected_data = data[data['tweet_created'].dt.hour == select_hour]

if show_hide2:
    st.subheader('When and Where are Users Tweeting from?')
    st.markdown('%i tweets between %i:00 and %i:00' %
                (len(selected_data), select_hour, (select_hour + 1) % 24))
Пример #6
0
        radius=100,
        extruded=True,
        pickable=True,
        elevation_scale=4,
        elevation_range=[0, 1000],
        ),
    ],
))

st.subheader("Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))
filtered = data[
    (data['date/time'].dt.hour >= hour) & (data['date/time'].dt.hour < (hour + 1))
]
hist = np.histogram(filtered['date/time'].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({"minute": range(60), "crashes": hist})
fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)
st.write(fig)

st.header("Top 5 dangerous streets by affected class")
select = st.selectbox('Affected class', ['Pedestrians', 'Cyclists', 'Motorists'])

if select == 'Pedestrians':
    st.write(original_data.query("injured_pedestrians >= 1")[["on_street_name", "injured_pedestrians"]].sort_values(by=['injured_pedestrians'], ascending=False).dropna(how="any")[:5])

elif select == 'Cyclists':
    st.write(original_data.query("injured_cyclists >= 1")[["on_street_name", "injured_cyclists"]].sort_values(by=['injured_cyclists'], ascending=False).dropna(how="any")[:5])

else:
    st.write(original_data.query("injured_motorists >= 1")[["on_street_name", "injured_motorists"]].sort_values(by=['injured_motorists'], ascending=False).dropna(how="any")[:5])

def main():
    menu = ['Home', 'Login', 'Signup']
    st.sidebar.title("MENU")
    choice = st.sidebar.selectbox("Menu", menu)
    st.markdown(
        """
    <style>
    .sidebar .sidebar-content {
        background: url(https://images.pexels.com/photos/1111316/pexels-photo-1111316.jpeg?auto=compress&cs=tinysrgb&dpr=1&w=500);
        color: white;
    }
    </style>
    """,
        unsafe_allow_html=True,
    )
    #st.sidebar.button("GOKU")

    if (choice == "Home"):
        st.title("Welcome To the HomePage")
        homepagebck()

        #[[thisisanimagelink](upload://7FxfXwDqJIZdYJ2QYADywvNRjB.png)](https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data)
        st.subheader("Latest Traffic updates")
        date = datetime.datetime.now()
        st.subheader(date)
        query_view = view_all_info()
        clean_db = pd.DataFrame(
            query_view, columns=["LOCATION", "STREET NAME", "TYPE OF TRAFFIC"])
        st.dataframe(clean_db)
        #show data and ...
        #st.write("https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data")

    elif (choice == "Login"):
        st.header("Login Section")

        # if (st.sidebar.button("veronica")):
        #     deleteUserNull()
        loginpagebck()

        username = st.sidebar.text_input("Username")
        password = st.sidebar.text_input("Password", type='password')
        if (st.sidebar.checkbox("Login")):

            create_usertable()
            result = login_user(username, password)
            if (result):

                st.success("Logged in as {}".format(username))

                task = st.selectbox("Task", [
                    "UPDATE",
                    "Data Set",
                    "Visualise",
                ])
                #update column of login page
                if (task == "UPDATE"):
                    st.subheader(
                        "Add current traffic update here and see that table")

                    create_todaytraffic()

                    borough = st.text_input("enter the borough")
                    on_street_name = st.text_input("enter the on_street_name")
                    type = st.text_input("enter the type")
                    #update button in updateLoginpage
                    if (st.button("UPDATE")):
                        add_trafficdata(borough, on_street_name, type)
                        query_view = view_all_info()
                        clean_db = pd.DataFrame(query_view,
                                                columns=[
                                                    "LOCATION", "STREET NAME",
                                                    "TYPE OF TRAFFIC"
                                                ])
                        st.dataframe(clean_db)
                    if (st.button("Drop all from today")):
                        deleteNull()
                        query_view = view_all_info()
                        clean_db = pd.DataFrame(query_view,
                                                columns=[
                                                    "LOCATION", "STREET NAME",
                                                    "TYPE OF TRAFFIC"
                                                ])
                        st.dataframe(clean_db)

                elif (task == "Data Set"):
                    st.subheader("Data Set from Uber about Newyork City")
                    #add set here

                    DATA_URL = (
                        " https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data "
                    )

                    st.markdown(
                        "This application is a streamlit dashboared that can "
                        "be used to analyse motor vhicle collision in nyc")

                    data = load_data(100000)
                    if (st.checkbox("data head", False)):
                        st.write(data.head())
                        st.info("Successfully Loaded Data Frame Head")
                    if st.checkbox("Show Raw Data", False):
                        st.subheader('Raw data')
                        st.info("Note That The Dataset is very large")
                        st.write(data)
                    # if(st.button("Visit Column Name")):
                    #     for col in data.columns:
                    #         st.write(col)

                elif (task == "Visualise"):
                    #st.subheader("Edith")

                    #add plots here
                    DATA_URL = (
                        " https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data "
                    )

                    st.markdown(
                        "This application is a streamlit dashboared that can "
                        "be used to analyse motor vhicle collision in nyc")

                    data = load_data(100000)
                    original_data = data
                    if (st.checkbox("data head", False)):
                        st.write(data.head())
                        st.info("Successfully Loaded Data Frame Head")
                    if st.checkbox("Show Raw Data", False):
                        st.subheader('Raw data')
                        st.info("Note That The Dataset is very large")
                        st.write(data)
                    st.header("Where are he most people injured in NYC?")
                    injured_people = st.slider(
                        "Number of persons injured in vehicle collisions", 0,
                        19)
                    st.map(
                        data.query("injured_persons >= @injured_people")[[
                            "latitude", "longitude"
                        ]].dropna(how="any"))

                    st.header(
                        "How many collisions occured during a given time of day?"
                    )
                    hour = st.slider("Hour to look at", 0, 23)
                    data = data[data['crash_date_crash_time'].dt.hour == hour]

                    st.markdown("Vehicle Collisions between %i:00 and %i:00" %
                                (hour, (hour + 1) % 24))

                    midpoint = (np.average(data['latitude']),
                                np.average(data['longitude']))

                    st.write(
                        pdk.Deck(
                            map_style="mapbox://styles/mapbox/light-v9",
                            initial_view_state={
                                "latitude": midpoint[0],
                                "longitude": midpoint[1],
                                "zoom": 11,
                                "pitch": 50,
                            },
                            layers=[
                                pdk.Layer(
                                    "HexagonLayer",
                                    data=data[[
                                        'crash_date_crash_time', 'latitude',
                                        'longitude'
                                    ]],
                                    get_position=['longitude', 'latitude'],
                                    radius=100,
                                    extruded=True,
                                    pickable=True,
                                    elevation_range=[0, 1000],
                                ),
                            ],
                        ))

                    st.subheader(
                        "Breakdown by minute between %i:00 and %i:00" %
                        (hour, (hour + 1) % 24))
                    filtered = data[
                        (data['crash_date_crash_time'].dt.hour >= hour)
                        & (data['crash_date_crash_time'].dt.hour < (hour + 1))]
                    hist = np.histogram(
                        filtered['crash_date_crash_time'].dt.minute,
                        bins=60,
                        range=(0, 60))[0]
                    chart_data = pd.DataFrame({
                        'minute': range(60),
                        'crashes': hist
                    })
                    fig = px.bar(chart_data,
                                 x='minute',
                                 y='crashes',
                                 hover_data=['minute', 'crashes'],
                                 height=400)
                    st.write(fig)

                    st.header("Top 5 dangerous streets by affected type")
                    st.markdown(
                        "based on visualisation and data interpretation")
                    select = st.selectbox(
                        'Affected type of people',
                        ['Pedestrians', 'Cyclists', 'Motorists'])

                    if (select == 'Pedestrians'):
                        st.write(
                            original_data.query("injured_pedestrians >= 1")[[
                                "on_street_name", "injured_pedestrians"
                            ]].sort_values(
                                by=['injured_pedestrians'],
                                ascending=False).dropna(how="any")[:5])

                    elif (select == 'Cyclists'):
                        st.write(
                            original_data.query("injured_cyclists >= 1")[[
                                "on_street_name", "injured_cyclists"
                            ]].sort_values(
                                by=['injured_cyclists'],
                                ascending=False).dropna(how="any")[:5])

                    else:
                        st.write(
                            original_data.query("injured_motorists >= 1")[[
                                "on_street_name", "injured_motorists"
                            ]].sort_values(
                                by=['injured_motorists'],
                                ascending=False).dropna(how="any")[:5])

            else:
                st.warning("Incorrect Username/Password")
                st.markdown("Enter correct Username/Password and try again")
                st.info("Go in sidebar to Signup for free")

    elif (choice == "Signup"):
        signuppagebck()
        # background for signup pages
        #https://images.pexels.com/photos/399161/pexels-photo-399161.jpeg?cs=srgb&dl=pexels-lumn-399161.jpg&fm=jpg
        st.subheader("Create New Account")
        new_user = st.text_input("Username")
        new_password = st.text_input("Password", type="password")

        if (st.button("Signup")):
            create_usertable()
            add_userdata(new_user, new_password)
            st.success("You Have Succesfully Created a Valid Account")
            st.info("Go in to Login Menu to login ")
Пример #8
0
if st.checkbox("Show Raw Data"):
    st.dataframe(df_listing.head())

if st.checkbox('Show Data Statistics'):
	st.dataframe(df_listing.describe())

geodata = load_geojson_data()


st.header("Listing Locations")

df_nbh = df_listing.groupby('neighbourhood_cleansed').mean()

st.markdown('We are interested in the popular areas of SF, from the chart we could observe that *Seacliff* and *Maria* are with higher average price. If the trip is budgeted, we could look into other neighbourhoods like *Crocker Amazon*.')

bar = px.bar(df_nbh['price'].sort_values(ascending=False).reset_index(), x="neighbourhood_cleansed", y='price', title="Average price of each neighbourhood in SF", color = 'neighbourhood_cleansed')
bar.update_xaxes(title="Neighbourhood")
bar.update_yaxes(title="Price")
st.plotly_chart(bar)
#print(df_nbh)

stats_nbh = geodata.merge(df_nbh, left_on = 'neighbourhood', right_on = 'neighbourhood_cleansed')
print(stats_nbh['price'])

# midpoint = (np.average(df_listing['longitude']), np.average(df_listing['latitude']))

stats = geodata.merge(df_listing, 'left', 'neighbourhood')



room_types = list(df_listing['room_type'].unique())
def main():
    players_data = spark.read.format("mongo").options(
        collection='players').load()
    players2 = spark.read.format("mongo").options(collection='players2').load()
    cc = spark.read.format("mongo").options(
        collection='countryContinent').load()

    cc = cc.select('country', 'continent')

    ## 2 columns missing nationality and body type

    players_a = players2.select('age', 'height_cm', 'weight_kg', 'nationality',
                                'club', 'overall', 'potential', 'body_type',
                                'pace', 'physic', 'movement_agility',
                                'power_stamina', 'mentality_aggression')
    # 'age','height','weight','nationality','club',body type','overall rating','potential','pace','physique','movement      agility','stamina','aggression'
    nationalities_agg = players2.select('nationality').distinct()
    nationalities_agg = nationalities_agg.join(
        cc, nationalities_agg['nationality'] == cc['country']).drop('country')
    players_a = players_a.join(
        nationalities_agg, players_a['nationality'] ==
        nationalities_agg['nationality']).drop('nationality')
    nationalities_agg = players_a.select('continent').distinct()
    l = nationalities_agg.collect()
    country_list = {}
    for i in range(0, len(l)):
        country_list.update({l[i][0]: str(i + 1)})
    # country_list

    players_a = players_a.withColumn('body_type', \
                                     functions.when((functions.col('body_type') == 'Lean'), 3) \
                                     .when((functions.col('body_type') == 'Normal'), 2) \
                                     .when((functions.col('body_type') == 'Stocky'), 1) \
                                     .otherwise(0))

    players_a = players_a.replace(country_list, 1, 'continent')
    players_a.show(3)

    players_b = players_a.select(players_a['weight_kg'],
                                 players_a['continent'], players_a['club'])
    players_b = players_b.groupBy('continent').agg(
        functions.avg('weight_kg').alias('average weight')).orderBy(
            'continent')
    players_c = players_a.select(players_a['continent'],
                                 players_a['body_type'], players_a['club'])
    players_c = players_c.groupBy('continent', 'body_type').agg(
        functions.count('body_type').alias('number of players')).orderBy(
            'continent')
    players_c = players_c.withColumn('body_type', \
                                     functions.when((functions.col('body_type') == 3), 'Lean') \
                                     .when((functions.col('body_type') == 2), 'Normal') \
                                     .when((functions.col('body_type') == 1), 'Stocky') \
                                     .otherwise('null'))
    players_c = players_c.filter(players_c['body_type'] != 'null')

    players_d = players_a.select('age', 'club').groupBy('club').agg(
        functions.avg('age').alias('average_age')).orderBy('average_age')
    players_d.show(truncate=False)

    players_a = players_a.toPandas()
    players_a.to_csv('cleaned.csv', index=False)

    inv_map = {v: k for k, v in country_list.items()}
    players_b = players_b.replace(inv_map, 1, 'continent')

    p = players_b.toPandas()
    fig = px.bar(p,
                 x="average weight",
                 y="continent",
                 color="continent",
                 orientation='h',
                 height=400)
    fig.show()

    players_c = players_c.replace(inv_map, 1, 'continent')
    players_c.show()
    players_c = players_c.toPandas()

    fig = px.bar(players_c,
                 x="number of players",
                 y="continent",
                 color="body_type",
                 orientation='h',
                 height=400)
    fig.show()

    # ax = sns.catplot(x="count", y="continent", hue="body_weight", kind="bar", data=players_c)
    # ax.set(xlabel='Number of Players', ylabel='Continent')

    # sphinx_gallery_thumbnail_number = 2

    df = pd.read_csv('cleaned.csv')
    df = df[1:]

    labels = [
        'age', 'height', 'weight', 'nationality', 'body type',
        'overall rating', 'potential', 'pace', 'physique', 'movement agility',
        'stamina', 'aggression'
    ]

    corr = df.corr().to_numpy().round(2)

    fig, ax = plt.subplots(figsize=(8, 8))
    im = ax.imshow(corr)  # possible: jet

    fig = ff.create_annotated_heatmap(
        z=corr,
        annotation_text=corr,
        x=[
            'age', 'height', 'weight', 'nationality', 'body type',
            'overall rating', 'potential', 'pace', 'physique',
            'movement agility', 'stamina', 'aggression'
        ],
        y=[
            'age', 'height', 'weight', 'nationality', 'body type',
            'overall rating', 'potential', 'pace', 'physique',
            'movement agility', 'stamina', 'aggression'
        ],
        hoverongaps=False,
        colorscale='Viridis',
        hoverinfo='z')
    fig.show()
Пример #10
0
                                                    ascending=False)

##### creación de graficas


def figures_to_html(figs, filename="20paisescontagios.html"):
    dashboard = open(filename, 'w')
    dashboard.write("<html><head></head><body>" + "\n")
    for fig in figs:
        inner_html = fig.to_html().split('<body>')[1].split('</body>')[0]
        dashboard.write(inner_html)
    dashboard.write("</body></html>" + "\n")


fig1 = px.bar(df_order_total_cases.head(20),
              x='location',
              y='total_cases',
              barmode='relative')

figures_to_html([fig1])


def figures_to_html(figs, filename="20paisesmuertes.html"):
    dashboard = open(filename, 'w')
    dashboard.write("<html><head></head><body>" + "\n")
    for fig in figs:
        inner_html = fig.to_html().split('<body>')[1].split('</body>')[0]
        dashboard.write(inner_html)
    dashboard.write("</body></html>" + "\n")


fig1 = px.bar(df_order_total_death.head(20),
Пример #11
0
# ref: https://dash.plotly.com/introduction
# ref: https://plotly.com/python/
# ref: bootstrap-crash-course
# ref: https://getbootstrap.com/docs/4.4/getting-started/introduction/

import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go
import plotly.express as px
import dash_bootstrap_components as dbc

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

data_canada = px.data.gapminder().query("country == 'Canada'")
fig_1 = px.bar(data_frame=data_canada, x='year', y='pop', template='ggplot2')
fig_2 = px.bar(data_frame=data_canada,
               x='year',
               y='pop',
               template='plotly_dark')

navbar = dbc.NavbarSimple(
    children=[
        dbc.NavItem(dbc.NavLink("Page 1", href="#")),
        dbc.DropdownMenu(
            children=[
                dbc.DropdownMenuItem("More pages", header=True),
                dbc.DropdownMenuItem("Page 2", href="#"),
                dbc.DropdownMenuItem("Page 3", href="#"),
            ],
            nav=True,
Пример #12
0
    st.write("Hank: That really makes sense. Thank you a lot bro! I think I can simply choose CS:GO as the start of my career since I can win tons of money from these tournaments.")
    st.write("Sean: Not so hurry boi. If you are interested in money, then let’s talk about the sexiest. Look at this chart, it shows the actual prize pool each game has every year. Dota2 is the most generous and they gave out over 30M dollars!")


xaxis_config = {
        'title':'Game'
        }


prize_df = get_data('prize')[['Name','Total Prize Pool','Year']]
st.markdown('---')
st.header("Let the Money Talk")
values = st.selectbox("Select Year ",years)
selected_df = prize_df.where(prize_df['Year']==str(values))
selected_df = selected_df.dropna()
f = px.bar(selected_df, x="Name", y = 'Total Prize Pool',title = 'Total Prize Pool',color='Name')
f.update_layout(
    title=title_config,
    yaxis={
        "title":'Total Prize',
        'rangemode':"tozero"
    },
    xaxis=xaxis_config,
    )
st.plotly_chart(f)

# Show the radar chart
if show:
    st.write("Hank: Wow! No wonder why professional Dota gamers like Puppey are so rich! But, wait, this is really confusing. Dota2 has the least tournaments but it has the largest rewards? Should I be a professional Dota2 player then?")
    st.write("Sean: Well, that also means it can be quite competitive. To really make the best decision, we need a bigger picture! Let’s look at this radar chart I made for you! So this radar chart has five aspects, Player growth, Average earning, Professional rate(It’s basically how hard it is to become a professional player), Total prize each year, and Total tournaments. The larger the number, the better for you.")
    st.write("Hank: Wow. This chart really helps me a lot. But I still need to spend more time considering my option. ")
Пример #13
0
def bar_cases_provinces():
    fig = px.bar(df, x="DATE", y="CASES", color="PROVINCE")
    fig.update_layout(template="plotly_white")
    return fig
Пример #14
0
#Figures created for the dashboard
fig1 = go.Figure(data=go.Scattergeo(
    lon=df_final['lon'],
    lat=df_final['lat'],
    mode='markers',
    text=df_final['content'],
    marker_color=df_final['size'],
))
fig1.update_layout(title="Shootings Reported per Location",
                   geo_scope='south america',
                   height=800)

fig2 = px.bar(counts[:10],
              x=counts[:10].index,
              y=counts[:10],
              labels={
                  'x': 'Neighborhoods',
                  'y': "Shooting's count"
              },
              color=counts[:10].index)
fig2.update_layout(title="Shootings reported by neighborhoods in RJ")


#Prepare the data for the 3rd figure
def prepare_df(df_x, name_nb='Belford Roxo'):
    #Function created to return a dataframe ready for plotting.
    #2 entry variables: original dataframe and neighborhood to filter by
    df_fig = df_x.copy()
    df_fig = df_fig.loc[df_fig['content'] == name_nb]
    df_fig['date'] = df_fig['date'].str[:11]
    df_fig = df_fig.sort_values(by=['date'])
    list_vals = [x for x in range(df_fig.shape[0])
def country_detail(request):
    country = request.POST['country']

    r = requests.get('https://www.trackcorona.live/api/countries')
    df_coun = r.json()
    df_coun = pd.DataFrame(df_coun['data'])

    check = False
    write = False
    country_code = ''

    if country.lower() in 'uk' or country.lower(
    ) in 'united kingdom' or country.lower() in 'england' or country.lower(
    ) in 'britain':
        check = True
        temp = df_coun[df_coun['location'] == 'UK']
        country = temp['location'].iloc[0]
        country_code = temp['country_code'].iloc[0]

    elif country.lower() in 'america' or country.lower(
    ) in 'united states of america' or country.lower() in 'usa':
        check = True
        temp = df_coun[df_coun['location'] == 'United States']
        country = temp['location'].iloc[0]
        country_code = temp['country_code'].iloc[0]

    else:
        for i in range(len(df_coun)):
            if country.lower() in df_coun['location'][i].lower():
                check = True
                country = df_coun['location'][i]
                country_code = df_coun['country_code'][i]

    if check:
        temp = df_coun[df_coun['location'] == country]
        Date = temp.iloc[0, 7].split()[0]
        Total = temp.iloc[0, 4]
        Discharged = temp.iloc[0, 6]
        Deaths = temp.iloc[0, 5]
        Active = Total - (Discharged + Deaths)

        r = requests.get('https://www.trackcorona.live/api/cities')
        df_city = r.json()
        df_city = pd.DataFrame(df_city['data'])

        df_city = df_city[df_city['country_code'] == country_code]
        if len(df_city) > 0:
            write = True
        df_city = df_city.sort_values(by=['confirmed', 'dead'],
                                      ascending=False)
        df_city['recovered'].fillna(value=0, inplace=True)
        df_city['dead'].fillna(value=0, inplace=True)
        df_city = df_city[:200]

        rows = []
        for i in range(len(df_city)):
            inn = []
            inn.append(df_city.iloc[i, 0])
            inn.append(int(df_city.iloc[i, 4]))
            inn.append(int(df_city.iloc[i, 6]))
            inn.append(int(df_city.iloc[i, 5]))
            rows.append(inn)

        temp = df_city[:30]

        fig_total = px.bar(
            temp,
            x='location',
            y='confirmed',
            hover_data=['confirmed', 'recovered', 'dead'],
            template='plotly_dark',
            title='30 most affected State/City of {}'.format(country))
        fig_total = opy.plot(fig_total, auto_open=False, output_type='div')
        fig_total

        map = folium.Map(location=[
            df_coun[df_coun['country_code'] == country_code]['latitude'],
            df_coun[df_coun['country_code'] == country_code]['longitude']
        ],
                         tiles='CartoDB dark_matter',
                         zoom_start=4)
        for lat, lon, value, name in zip(df_city['latitude'],
                                         df_city['longitude'],
                                         df_city['confirmed'],
                                         df_city['location']):
            folium.CircleMarker(
                [lat, lon],
                radius=3,
                popup=('<strong>State: </strong>' + name + '<br>'
                       '<strong>Confirmed: </strong>' + str(value)),
                color='red',
                fill_color='red',
                fill_opacity=0.3).add_to(map)
        map = map._repr_html_()

        return render(
            request, 'rest_state.html', {
                'check': check,
                'write': write,
                'Country': country,
                'Date': Date,
                'Total': Total,
                'Active': Active,
                'Discharged': Discharged,
                'Deaths': Deaths,
                'rows': rows,
                'fig': fig_total,
                'map': map
            })

    else:
        return render(request, 'rest_state.html', {'check': check})
Пример #16
0
def index():
    # extract data needed for visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # Get top 5 labels and their counts
    labels_df = df.iloc[:, 3:]
    top_5_labels = labels_df.sum().sort_values(ascending=False).head(5)
    top_label_names = list(top_5_labels.index)
    top_label_counts = list(top_5_labels.values)

    # Generate correlation matrix for the top 5 features in the dataset
    top_5_corr = df.loc[:, top_label_names].corr()
    fig = pex.bar(top_5_corr)


    # create visuals
    graphs = [
        {
            'data': [
                Bar(
                    x=genre_names,
                    y=genre_counts
                )
            ],

            'layout': {
                'title': 'Distribution of Message Genres',
                'yaxis': {
                    'title': "Count"
                },
                'xaxis': {
                    'title': "Genre"
                }
            }
        },
        {
            'data': [
                Bar(
                    x=top_label_counts,
                    y=top_label_names,
                    orientation='h'
                )
            ],

            'layout': {
                'title': 'Top 5 message labels in the dataset',
                'yaxis': {
                    'title': "Label"
                },
                'xaxis': {
                    'title': "Count"
                }
            }
        },
        {
            'data': [
                Heatmap(
                    x=top_label_names,
                    y=top_label_names,
                    z=top_5_corr
                )
            ],

            'layout': {
                'title': 'Correlation matrix for the top 5 labels',
                'yaxis': {
                    'title': "Label"
                },
                'xaxis': {
                    'title': "Label"
                }
            }
        }
    ]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
def get_value_count_graphs(_data):
    for each_col in _data.columns:
        print("Currently in", each_col)
        fig = px.bar(_data[each_col].value_counts().sort_index(),
                     title=each_col.upper())
        fig.show()
Пример #18
0
        "Confirmed": ":,",
        "Deaths": ":,",
        "Recovered": ":,",
        "Country_Region": False,
    },
)
bubble_map.update_layout(margin=dict(l=0, r=0, t=50, b=0),
                         coloraxis_colorbar=dict(xanchor="left", x=0))

bars_graph = px.bar(
    totals_df,
    x="condition",
    hover_data={"count": ":,"},
    y="count",
    template="plotly_dark",
    title="Total Global Cases",
    labels={
        "condition": "Condition",
        "count": "Count",
        "color": "Condition"
    },
)
bars_graph.update_traces(marker_color=["#e74c3c", "#8e44ad", "#27ae60"])

app.layout = html.Div(
    style={
        "minHeight": "100vh",
        "backgroundColor": "#111111",
        "color": "white",
        "fontFamily": "Open Sans, sans-serif",
        "x-scroll": "auto",
Пример #19
0
def plot_quantile_returns_bar(mean_ret_by_q,
                              by_group=False,
                              ylim_percentiles=None):
    """
    Plots mean period wise returns for factor quantiles.

    Parameters
    ----------
    mean_ret_by_q : pd.DataFrame
        DataFrame with quantile, (group) and mean period wise return values.
    by_group : bool
        Disaggregated figures by group.
    ylim_percentiles : tuple of integers
        Percentiles of observed data to use as y limits for plot.
    """

    mean_ret_by_q = mean_ret_by_q.copy()
    colors = px.colors.qualitative.Plotly

    if ylim_percentiles is not None:
        ymin = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[0]) *
                DECIMAL_TO_BPS)
        ymax = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[1]) *
                DECIMAL_TO_BPS)
    else:
        ymin = None
        ymax = None
    if by_group:
        group_keys = sorted(
            mean_ret_by_q.index.get_level_values('group').unique())
        num_group = len(group_keys)
        grouped = mean_ret_by_q.groupby(level='group')
        v_spaces = ((num_group - 1) // 2) + 1
        # 每次显示二列
        for i in range(v_spaces):
            subplot_titles = group_keys[i * 2:(i + 1) * 2]
            # 如实际只有一列,也设定为2
            gf = make_subplots(rows=1,
                               cols=2,
                               y_title='收益率(基点)',
                               subplot_titles=subplot_titles,
                               shared_yaxes=True)
            for j, sc in enumerate(subplot_titles, 1):
                cor = grouped.get_group(sc)
                bar_data = cor.xs(sc, level='group').multiply(DECIMAL_TO_BPS)
                columns = bar_data.columns
                for k, name in enumerate(columns):
                    gf.add_trace(go.Bar(name=name,
                                        legendgroup=sc,
                                        marker_color=colors[k],
                                        showlegend=True if j == 1 else False,
                                        x=bar_data.index,
                                        y=bar_data[name].values),
                                 row=1,
                                 col=j)
            gf.update_layout(barmode='group')
            gf.update_yaxes(range=[ymin, ymax])
            gf.show()
    else:
        # gf = make_subplots(x_title='周期频率', y_title='收益率(基点)')
        bar_data = mean_ret_by_q.multiply(DECIMAL_TO_BPS)
        columns = bar_data.columns
        bar_data.reset_index(inplace=True)

        fig = px.bar(bar_data, x='factor_quantile', y=columns, barmode='group')

        fig.update_layout(title_text="因子分位数分组期间平均收益率")
        fig.update_xaxes(title_text='分位数')
        fig.update_yaxes(range=[ymin, ymax], title_text='收益率(基点)')
        fig.update_layout(
            legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
        fig.show()
Пример #20
0
import pandas as pd
import plotly.express as px
df = pd.read_csv("data.csv")
fig = px.bar(df, x="Country", y="InternetUsers")
fig.show()
Пример #21
0
    ))

# ------------------------------------------------------ Charts with Plotly
st.subheader(f"Breakdown by minute between {hour}:00 and {hour + 1}:00")
# filtering data in DF through hour filter
filtered_df = vehicles_df[(vehicles_df['date_time'].dt.hour >= hour)
                          & (vehicles_df['date_time'].dt.hour < (hour + 1))]

# creating new DF only with histogram data to load into plotly bar
hist = np.histogram(filtered_df['date_time'].dt.minute, bins=60,
                    range=(0, 60))[0]
chart_data = pd.DataFrame({'minute': range(60), 'crashes': hist})
# Plotly Chart
fig = px.bar(chart_data,
             x="minute",
             y="crashes",
             hover_data=['minute', 'crashes'],
             height=400)
st.write(fig)

# ------------------------------------------------------ Final Chart
st.header("Top five dangerous streets by affected type")
selection = st.selectbox(label='Afected type of people',
                         options=['Pedestrians', 'Cyclists', 'Motorists'])

# this could be done better than showed in course project

if selection == 'Pedestrians':
    st.write(vehicles_df.query("injured_pedestrians >=1") \
                 [['on_street_name', 'injured_pedestrians']].nlargest(5, 'injured_pedestrians').dropna(how='any'))
elif selection == 'Cyclists':
Пример #22
0
def main():

    image = Image.open('images/wordcloud.png')

    st.sidebar.image(image, width=200)
    st.sidebar.header("NLP demos")
    st.sidebar.text("Select an option and see it in action!")

    st.title("Natural Language Processing demos")
    st.markdown("""
    	#### An NLP app for demonstration purposes: analyze your text!
    	

    	""")

    # Named Entity Recognition

    if st.sidebar.checkbox("Named Entity Recognition"):

        lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT'])

        if lang_options == 'EN':
            lang_model = 'en_core_web_sm'
        else:
            lang_model = 'pt_core_news_sm'

        message = st.text_area("Enter text inside the box...")

        if st.button("Run"):
            with st.spinner('Wait for it...'):
                entity_result = entity_analyzer(message, lang_model)
            st.success(st.json(entity_result))

    # Summarization

    if st.sidebar.checkbox("Text Summarization"):
        st.subheader("Summarize Your Text")

        message = st.text_area(
            "Enter text (EN only for now) inside the box...")

        ratio_value = st.slider(
            'Select a ratio (%) that determines the proportion of the number of sentences of the original text to be chosen for the summary',
            0, 100, (10))

        if st.button("Run"):
            with st.spinner('Wait for it...'):
                summary_result = summarize(message, ratio=ratio_value / 100)
            st.success(summary_result)

    # # Automated Keyword Extraction

    # if st.sidebar.checkbox("Automated Keyword Extraction"):
    # 	st.subheader("Extract Keywords")

    # 	lang_options = st.selectbox("Choose language (EN/PT)",['EN','PT'])

    # 	if lang_options == 'EN':
    # 		lang_model = 'en'
    # 	elif lang_options == 'PT':
    # 		lang_model = 'pt'
    # 	else:
    # 		lang_model = 'en'

    # 	message = st.text_area("Enter text inside the box...")

    # 	if st.button("Run"):
    # 		with st.spinner('Wait for it...'):

    # 			# set YAKE! parameters
    # 			language = lang_model
    # 			max_ngram_size = 2
    # 			deduplication_thresold = 0.2
    # 			deduplication_algo = "seqm"
    # 			windowSize = 1
    # 			numOfKeywords = 10

    # 			custom_kw_extractor = yake.KeywordExtractor(
    # 				lan=language,
    # 				n=max_ngram_size,
    # 				dedupLim=deduplication_thresold,
    # 				dedupFunc=deduplication_algo,
    # 				windowsSize=windowSize,
    # 				top=numOfKeywords,
    # 				features=None,
    # 			)
    # 			keywords = custom_kw_extractor.extract_keywords(message)
    # 			keywords = [kw for kw, res in keywords]

    # 			st.success('Keywords: ' + (', '.join(sorted(keywords))))


# Automated Keyword Extraction

    if st.sidebar.checkbox("Automated Keyword Extraction"):
        st.subheader("Extract Keywords")

        lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT'])

        if lang_options == 'EN':
            stop_words = en_stopwords
            lang_model = 'en_core_web_sm'
        else:
            lang_model = 'pt_core_news_sm'
            stop_words = pt_stopwords

        # nlp = spacy.load(lang_model)

        message = st.text_area("Enter text inside the box...")

        if st.button("Run"):
            with st.spinner('Wait for it...'):

                # corpus = []

                text = ''.join([
                    unidecode.unidecode(accented_string)
                    for accented_string in message
                ])

                corpus = clean_string(text, lang_options)

                tr4w = TextRank4Keyword()
                tr4w.set_stopwords(stopwords=stop_words, lang_model=lang_model)
                # tr4w.set_stopwords(stopwords=stop_words)
                # tr4w.analyze(ppp, candidate_pos = ['NOUN', 'PROPN', 'VERB'], window_size=4, lower=False)
                tr4w.analyze(corpus,
                             window_size=4,
                             lower=False,
                             lang_model=lang_model)

                st.success('Keywords: ' +
                           (', '.join(sorted(tr4w.get_keywords(10)))))

    # Data Anonymization (erasing names)

    if st.sidebar.checkbox("Anonymize Personal Data"):
        st.subheader("Anonymize Your Data: Hiding Names")

        lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT'])

        if lang_options == 'EN':
            lang_model = 'en_core_web_sm'
        elif lang_options == 'PT':
            lang_model = 'pt_core_news_sm'
        else:
            lang_model = 'en_core_web_sm'

        message = st.text_area("Enter text inside the box...")

        if st.button("Run"):
            with st.spinner('Wait for it...'):
                names_cleaned_result = sanitize_names(message, lang_model)
                st.success(names_cleaned_result)

    # N-grams

    if st.sidebar.checkbox("N-Grams Barplot"):
        st.subheader("Visualize an N-grams barplot")

        lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT'])

        if lang_options == 'EN':
            lang_model = 'english'
        elif lang_options == 'PT':
            lang_model = 'portuguese'
        else:
            lang_model = 'english'

        ngram_options = st.selectbox("Choose N for N-grams (1, 2 or 3)",
                                     [1, 2, 3])

        if ngram_options == 1:
            ngrams = 1
        elif ngram_options == 2:
            ngrams = 2
        else:
            ngrams = 3

        message = st.text_area("Let's analyze and get some visuals...")

        if st.button("Run"):
            with st.spinner('Wait for it...'):
                corpus = []

                text = ''.join([
                    unidecode.unidecode(accented_string)
                    for accented_string in message
                ])

                corpus.append(clean_string(text, lang_model))

                top3_words = get_top_n_words(corpus, ngrams, n=20)
                top3_df = pd.DataFrame(top3_words)
                top3_df.columns = ["N-gram", "Freq"]
                fig = px.bar(top3_df, x='N-gram', y='Freq')

                st.plotly_chart(fig)

    # Wordcloud

    if st.sidebar.checkbox("Wordcloud"):
        st.subheader("Visualize a wordcloud")

        lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT'])

        if lang_options == 'EN':
            lang_model = 'en_core_web_sm'
            stop_words = en_stopwords
        else:
            lang_model = 'pt_core_news_sm'
            stop_words = pt_stopwords

        message = st.text_area("Let's analyze and get some visuals...")

        if st.button("Run"):
            with st.spinner('Wait for it...'):
                corpus = []

                text = ''.join([
                    unidecode.unidecode(accented_string)
                    for accented_string in message
                ])

                corpus.append(clean_string(text, lang_model))

                #Word cloud
                wordcloud = WordCloud(background_color='white',
                                      stopwords=stop_words,
                                      max_words=100,
                                      max_font_size=50,
                                      random_state=42).generate(str(corpus))
                fig = plt.figure(1)
                plt.imshow(wordcloud, interpolation="bilinear")
                plt.axis('off')
                st.pyplot()
Пример #23
0
# - - - - - - - - - - - - - - #

# - - - - - - - - - - - - - - #
# Loading data
# - - - - - - - - - - - - - - #
df = pd.DataFrame({
    "Fruit": ["Apples", "Oranges", "Bananas", "Apples", "Oranges", "Bananas"],
    "Amount": [4, 1, 2, 2, 4, 5],
    "City": ["SF", "SF", "SF", "Montreal", "Montreal", "Montreal"]
})

# - - - - - - - - - - - - - - #
# Defining figures
# - - - - - - - - - - - - - - #

fig = px.bar(df, x="Fruit", y="Amount", color="City", barmode="group")

# - - - - - - - - - - - - - - #
# App layout
# - - - - - - - - - - - - - - #

app.layout = html.Div(children=[
    html.H1(children='Hello Dash EPF'),

    html.Div(children='''
        Dash: A web application framework for Python.
    '''),

    dcc.Graph(
        id='example-graph',
        figure=fig
# Rise of COVID-19 cases in India
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=dbd_India['Date'],
               y=dbd_India['Total Cases'],
               mode='lines+markers',
               name='Total Cases'))
fig.update_layout(
    title_text='Trend of Coronavirus Cases in India (Cumulative cases)',
    plot_bgcolor='rgb(230, 230, 230)')
fig.show()

# New COVID-19 cases reported daily in India

import plotly.express as px
fig = px.bar(dbd_India, x="Date", y="New Cases", barmode='group', height=400)
fig.update_layout(title_text='Coronavirus Cases in India on daily basis',
                  plot_bgcolor='rgb(230, 230, 230)')

fig.show()
"""# Part 2: Is the trend similar to Italy/ S.Korea/ Wuhan?

India has already crossed 562 cases. It is very important to contain the situation in the coming 21 days.The numbers of coronavirus patients starting doubling after these countries hit the 100 mark and almost starting increasing exponentially.

## 2.1 Cumulative cases in India, Italy, S.Korea, and Wuhan
"""

# import plotly.express as px
fig = px.bar(dbd_India,
             x="Date",
             y="Total Cases",
Пример #25
0
def update_by_genre_and_role(district):
    data = []
    my_df = df[df["DISTRITO"] == district]
    figure = px.bar(my_df, x="SEXO", y="TIPO PERSONA", color="TIPO PERSONA")

    return figure
Пример #26
0
    xaxis = dict(zeroline = False)
)

# Let's learn such basic beautifying with ggplot

ggplot(data, aes(x="GDP", y="Infant Mortality Rate"))+geom_point(fill="pink", color="darkred", size=3)

# How GDP of the World Changing over time? Filled line graph

fig = px.area(data_frame = data, x = "Year", y = "GDP", line_group="Country/Region")
fig.show();

# How GDP of different countries are changing over time?

ggplot(data, aes(x= "Year", y = "GDP", group="Country/Region")) + geom_line() + geom_point()

# Which Region is producing the highest CO2 Emission? Bar graph

fig = px.bar(data_frame = data, x = "Region", y = "CO2 Emissions")
fig.show()

# Compare Europe and Africa in terms the member countries' birth rates to share your insights? Create a Boxplot of birthrate and show all the regions on the plot

fig = px.box(data_frame = data, x = "Region", y = "Birth Rate")
fig.show()

# Create a histogram of internet usage

fig = px.histogram(data_frame = data, x = "Internet Usage")
fig.show()
Пример #27
0
external_stylesheets = [
    'https://codepen.io/chriddyp/pen/bWLwgP.css', "./static/da_style.css"
]
# server = Flask(__name__)

app2 = dash.Dash(__name__, external_stylesheets=external_stylesheets)

conn = _sqlite3.connect('./r5.db', check_same_thread=False)
data = pd.read_sql('Select roll_no,subject,marks from scorekids', conn)
data1 = pd.read_sql('Select roll_no,status from result', conn)
#print(data)
total_rows = len(data.axes[0])
#print(total_rows)
fig = make_subplots(rows=1, cols=2)

fig = px.bar(data, x="roll_no", y="marks", color="subject", barmode="group")
fig1 = px.pie(data1, values='roll_no', names='status')
app2.layout = html.Div(children=[
    html.H1('Scoresheet', style={
        'textAlign': 'center',
    }),
    dcc.Graph(id='bar-graph', figure=fig),
    html.Hr(),
    html.H1('Pie Chart', style={
        'textAlign': 'center',
        'paddingTop': 20
    }),
    dcc.Graph(id='pie-chart', figure=fig1),
],
                       style={'backgroundColor': 'white'})
df3 = df2.drop(columns=['value'])

df3.head()

Total = df2['value'].sum()
print (Total)

"""The below interactive bar plot is showing me the different crimes that occured in the year 2011. The bar plot allows me to see which borough in London had the most crimes in the year, giving me the interaction of seeing which crimes occured (major or minor). Looking at this, I would like to analyse the column 'month' and see how it differs in the type of crime that occured."""

import plotly.express as px
data = px.data.gapminder()

df3_major = df3['crime_occured']
fig = px.bar(df2, x='borough', y='value',
             hover_data=['major_category', 'minor_category'], color='crime_occured',
             labels={'Month':'Sum of Crimes Occured'}, height=400)
#fig.set_title("The frequency and type of crime occured in different boroughs in London")
fig.show()

import plotly.express as px
data = px.data.gapminder()

df3_major = df3['crime_occured']
fig = px.bar(df3, x='borough', y='crime_occured',
             hover_data=['major_category', 'minor_category'], color='crime_occured',
             labels={'Month':'Sum of Crimes Occured'}, height=400)
#fig.set_title("The frequency and type of crime occured in different boroughs in London")
fig.show()

# Scatter plot for minor_category
Пример #29
0
for tweets in original_tweets:
    if len(tweets[4]) != 0:
        for tag in tweets[4]:
            if tag in tags_dictionary.keys():
                tags_dictionary[tag] += 1
            else:
                tags_dictionary[tag] = 1

#Sort the tags_dictionary according to the count of tags
sorted_temp = sorted((value, key) for (key, value) in tags_dictionary.items())
sorted_temp.reverse()

#Plot all tags that Trump has used in 2019 in his original tweets and their corresponding frequency
sorted_tags = pd.DataFrame(sorted_temp, columns=['counts', 'tags'])
print(sorted_tags)
tags_counts = px.bar(sorted_tags, x='tags', y='counts')
tags_counts.show()

#print the top 20 tags Trump has used in 2019
top_ten_tags = px.bar(sorted_tags[0:19], x='tags', y='counts')
top_ten_tags.show()

# In[7]:
'''The above bar charts show what hashtags Trump liked to use in 2019. Unexpectedly, Trump only used #MAGA for 72 times, 
which is a small amount compared to his original tweets.

According to the first bar chart, Trump tended to use different hashtags among his tweets because most of the hashtags
only appear once in his tweets. 

The second bar chart shows the top 20 most frequently used hashtags by Trump. Unsurprisingly, political campaign hashtags, 
including #MAGA, #KAG2020, #2020, and #KAG are used most frequently. Hashtags related to contemporary news
        #     'text': "Field of Study of Doctorate Recipients by Sex and Selected Years",
        #     'y':0.94,
        #     'x':0.5,
        #     'xanchor': 'center',
        #     'yanchor': 'top'})
        st.plotly_chart(fig_1)

st.sidebar.markdown(
    "### Earned Doctorate Recipients by Sex and Race/Ethnicity")
select2 = st.sidebar.selectbox('Visualization type', ['Barplot'], key='2')
data2 = pd.DataFrame(data)
if not st.sidebar.checkbox("Hide", True, key='2'):
    if select2 == 'Bar plot':
        fig_2 = px.bar(data2,
                       x="Race_and_Ethnicity",
                       y="Number",
                       color="Sex",
                       height=500)
        st.plotly_chart(fig_2)

st.sidebar.markdown("### Earned Doctorate Recipients by Race and Ethnicity")
select3 = st.sidebar.selectbox('Visualization type', ['Bar plot', 'Pie chart'],
                               key='3')
race_count = data.groupby(["Race_and_Ethnicity"])['Number'].agg('sum')
race_count = pd.DataFrame({
    'Race_and_Ethnicity': race_count.index,
    'Number': race_count.values
})
if not st.sidebar.checkbox("Hide", True, key='3'):
    st.markdown("### Earned Doctorate Recipients by Race/Ethnicity")
    if select3 == 'Bar plot':