import plotly.express as px fig = px.bar(width=400) fig.show()
else: date_options[weeks] = date_item.strftime("%b %d") weeks = weeks + 1 #Pick a default date inprogress = data_US['date'][data_US.index[-3]] first_date = data_US['date'][data_US.index[0]] last_date = data_US['date'][data_US.index[-1]] #Set up the charts import plotly.express as px import plotly.graph_objects as go #fig = go.Figure() # or any Plotly Express function e.g. px.bar(...) fig = px.choropleth() bar1 = px.bar() bar2 = px.bar() colorscale = [ "#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1", "#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9", "#08519c", "#0b4083", "#08306b" ] # or px.colors.sequential.Plasma #Style the charts plotcfg = {'displayModeBar': False} #fig1.update_xaxes(rangeslider_visible=True) fig.update_layout(height=500, margin=dict(l=0, r=0, b=0, t=0, pad=0), coloraxis_colorbar=dict(title="Z-score"), plot_bgcolor='rgb(255,255,255)')
import plotly.express as px fig = px.bar() fig.show()
2016: '2016', 2017: '2017', 2018: '2018', 2019: '2019' } c = list(df["ZIP CODE"].astype(str)) nomi = pgeocode.Nominatim('us') df[["latitude", "longitude"]] = nomi.query_postal_code(c)[["latitude", "longitude"]] df['detail'] = df["ADDRESS 1"] + " " + df['ZIP CODE'].astype( str) + ' payment: ' + df['payments'].astype(str) fig1 = px.bar(year_fund, x='year', y='funded_amount', color="purpose", barmode="group") fig1.update_layout(title='Total Funded Amount based on Need in every years') fig2 = px.scatter_mapbox(df, lat="latitude", lon="longitude", color='BUILDING CLASS CATEGORY', text=df['detail'], zoom=9.5, mapbox_style='open-street-map') fig2.update_layout(title='New Yokers Payment based on Building Class Category') app.layout = html.Div([ dcc.Graph(id='graph1', figure=fig1),
]].sample(n=1).iat[0, 0]) st.sidebar.subheader('Number of Tweets by Sentiment') show_hide1 = st.sidebar.checkbox('Show Vizualization', True, key='1') viz_select = st.sidebar.selectbox('Vizualization Type', ['Histogram', 'Pie Chart']) sentiment_count = data['airline_sentiment'].value_counts() sentiment_count = pd.DataFrame({ 'Sentiment': sentiment_count.index, 'Tweets': sentiment_count.values }) if show_hide1: st.subheader('Number of Tweets by Sentiment') if viz_select == 'Histogram': fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', height=500) st.plotly_chart(fig) elif viz_select == 'Pie Chart': fig = px.pie(sentiment_count, values='Tweets', names='Sentiment') st.plotly_chart(fig) st.sidebar.subheader('When and Where are Users Tweeting from?') show_hide2 = st.sidebar.checkbox('Show Vizualization', True, key='2') select_hour = st.sidebar.slider('Hour of Day', 0, 23) select_raw_data = st.sidebar.checkbox('Show Raw Data', False) selected_data = data[data['tweet_created'].dt.hour == select_hour] if show_hide2: st.subheader('When and Where are Users Tweeting from?') st.markdown('%i tweets between %i:00 and %i:00' % (len(selected_data), select_hour, (select_hour + 1) % 24))
radius=100, extruded=True, pickable=True, elevation_scale=4, elevation_range=[0, 1000], ), ], )) st.subheader("Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24)) filtered = data[ (data['date/time'].dt.hour >= hour) & (data['date/time'].dt.hour < (hour + 1)) ] hist = np.histogram(filtered['date/time'].dt.minute, bins=60, range=(0, 60))[0] chart_data = pd.DataFrame({"minute": range(60), "crashes": hist}) fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400) st.write(fig) st.header("Top 5 dangerous streets by affected class") select = st.selectbox('Affected class', ['Pedestrians', 'Cyclists', 'Motorists']) if select == 'Pedestrians': st.write(original_data.query("injured_pedestrians >= 1")[["on_street_name", "injured_pedestrians"]].sort_values(by=['injured_pedestrians'], ascending=False).dropna(how="any")[:5]) elif select == 'Cyclists': st.write(original_data.query("injured_cyclists >= 1")[["on_street_name", "injured_cyclists"]].sort_values(by=['injured_cyclists'], ascending=False).dropna(how="any")[:5]) else: st.write(original_data.query("injured_motorists >= 1")[["on_street_name", "injured_motorists"]].sort_values(by=['injured_motorists'], ascending=False).dropna(how="any")[:5])
def main(): menu = ['Home', 'Login', 'Signup'] st.sidebar.title("MENU") choice = st.sidebar.selectbox("Menu", menu) st.markdown( """ <style> .sidebar .sidebar-content { background: url(https://images.pexels.com/photos/1111316/pexels-photo-1111316.jpeg?auto=compress&cs=tinysrgb&dpr=1&w=500); color: white; } </style> """, unsafe_allow_html=True, ) #st.sidebar.button("GOKU") if (choice == "Home"): st.title("Welcome To the HomePage") homepagebck() #[[thisisanimagelink](upload://7FxfXwDqJIZdYJ2QYADywvNRjB.png)](https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data) st.subheader("Latest Traffic updates") date = datetime.datetime.now() st.subheader(date) query_view = view_all_info() clean_db = pd.DataFrame( query_view, columns=["LOCATION", "STREET NAME", "TYPE OF TRAFFIC"]) st.dataframe(clean_db) #show data and ... #st.write("https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data") elif (choice == "Login"): st.header("Login Section") # if (st.sidebar.button("veronica")): # deleteUserNull() loginpagebck() username = st.sidebar.text_input("Username") password = st.sidebar.text_input("Password", type='password') if (st.sidebar.checkbox("Login")): create_usertable() result = login_user(username, password) if (result): st.success("Logged in as {}".format(username)) task = st.selectbox("Task", [ "UPDATE", "Data Set", "Visualise", ]) #update column of login page if (task == "UPDATE"): st.subheader( "Add current traffic update here and see that table") create_todaytraffic() borough = st.text_input("enter the borough") on_street_name = st.text_input("enter the on_street_name") type = st.text_input("enter the type") #update button in updateLoginpage if (st.button("UPDATE")): add_trafficdata(borough, on_street_name, type) query_view = view_all_info() clean_db = pd.DataFrame(query_view, columns=[ "LOCATION", "STREET NAME", "TYPE OF TRAFFIC" ]) st.dataframe(clean_db) if (st.button("Drop all from today")): deleteNull() query_view = view_all_info() clean_db = pd.DataFrame(query_view, columns=[ "LOCATION", "STREET NAME", "TYPE OF TRAFFIC" ]) st.dataframe(clean_db) elif (task == "Data Set"): st.subheader("Data Set from Uber about Newyork City") #add set here DATA_URL = ( " https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data " ) st.markdown( "This application is a streamlit dashboared that can " "be used to analyse motor vhicle collision in nyc") data = load_data(100000) if (st.checkbox("data head", False)): st.write(data.head()) st.info("Successfully Loaded Data Frame Head") if st.checkbox("Show Raw Data", False): st.subheader('Raw data') st.info("Note That The Dataset is very large") st.write(data) # if(st.button("Visit Column Name")): # for col in data.columns: # st.write(col) elif (task == "Visualise"): #st.subheader("Edith") #add plots here DATA_URL = ( " https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95/data " ) st.markdown( "This application is a streamlit dashboared that can " "be used to analyse motor vhicle collision in nyc") data = load_data(100000) original_data = data if (st.checkbox("data head", False)): st.write(data.head()) st.info("Successfully Loaded Data Frame Head") if st.checkbox("Show Raw Data", False): st.subheader('Raw data') st.info("Note That The Dataset is very large") st.write(data) st.header("Where are he most people injured in NYC?") injured_people = st.slider( "Number of persons injured in vehicle collisions", 0, 19) st.map( data.query("injured_persons >= @injured_people")[[ "latitude", "longitude" ]].dropna(how="any")) st.header( "How many collisions occured during a given time of day?" ) hour = st.slider("Hour to look at", 0, 23) data = data[data['crash_date_crash_time'].dt.hour == hour] st.markdown("Vehicle Collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24)) midpoint = (np.average(data['latitude']), np.average(data['longitude'])) st.write( pdk.Deck( map_style="mapbox://styles/mapbox/light-v9", initial_view_state={ "latitude": midpoint[0], "longitude": midpoint[1], "zoom": 11, "pitch": 50, }, layers=[ pdk.Layer( "HexagonLayer", data=data[[ 'crash_date_crash_time', 'latitude', 'longitude' ]], get_position=['longitude', 'latitude'], radius=100, extruded=True, pickable=True, elevation_range=[0, 1000], ), ], )) st.subheader( "Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24)) filtered = data[ (data['crash_date_crash_time'].dt.hour >= hour) & (data['crash_date_crash_time'].dt.hour < (hour + 1))] hist = np.histogram( filtered['crash_date_crash_time'].dt.minute, bins=60, range=(0, 60))[0] chart_data = pd.DataFrame({ 'minute': range(60), 'crashes': hist }) fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400) st.write(fig) st.header("Top 5 dangerous streets by affected type") st.markdown( "based on visualisation and data interpretation") select = st.selectbox( 'Affected type of people', ['Pedestrians', 'Cyclists', 'Motorists']) if (select == 'Pedestrians'): st.write( original_data.query("injured_pedestrians >= 1")[[ "on_street_name", "injured_pedestrians" ]].sort_values( by=['injured_pedestrians'], ascending=False).dropna(how="any")[:5]) elif (select == 'Cyclists'): st.write( original_data.query("injured_cyclists >= 1")[[ "on_street_name", "injured_cyclists" ]].sort_values( by=['injured_cyclists'], ascending=False).dropna(how="any")[:5]) else: st.write( original_data.query("injured_motorists >= 1")[[ "on_street_name", "injured_motorists" ]].sort_values( by=['injured_motorists'], ascending=False).dropna(how="any")[:5]) else: st.warning("Incorrect Username/Password") st.markdown("Enter correct Username/Password and try again") st.info("Go in sidebar to Signup for free") elif (choice == "Signup"): signuppagebck() # background for signup pages #https://images.pexels.com/photos/399161/pexels-photo-399161.jpeg?cs=srgb&dl=pexels-lumn-399161.jpg&fm=jpg st.subheader("Create New Account") new_user = st.text_input("Username") new_password = st.text_input("Password", type="password") if (st.button("Signup")): create_usertable() add_userdata(new_user, new_password) st.success("You Have Succesfully Created a Valid Account") st.info("Go in to Login Menu to login ")
if st.checkbox("Show Raw Data"): st.dataframe(df_listing.head()) if st.checkbox('Show Data Statistics'): st.dataframe(df_listing.describe()) geodata = load_geojson_data() st.header("Listing Locations") df_nbh = df_listing.groupby('neighbourhood_cleansed').mean() st.markdown('We are interested in the popular areas of SF, from the chart we could observe that *Seacliff* and *Maria* are with higher average price. If the trip is budgeted, we could look into other neighbourhoods like *Crocker Amazon*.') bar = px.bar(df_nbh['price'].sort_values(ascending=False).reset_index(), x="neighbourhood_cleansed", y='price', title="Average price of each neighbourhood in SF", color = 'neighbourhood_cleansed') bar.update_xaxes(title="Neighbourhood") bar.update_yaxes(title="Price") st.plotly_chart(bar) #print(df_nbh) stats_nbh = geodata.merge(df_nbh, left_on = 'neighbourhood', right_on = 'neighbourhood_cleansed') print(stats_nbh['price']) # midpoint = (np.average(df_listing['longitude']), np.average(df_listing['latitude'])) stats = geodata.merge(df_listing, 'left', 'neighbourhood') room_types = list(df_listing['room_type'].unique())
def main(): players_data = spark.read.format("mongo").options( collection='players').load() players2 = spark.read.format("mongo").options(collection='players2').load() cc = spark.read.format("mongo").options( collection='countryContinent').load() cc = cc.select('country', 'continent') ## 2 columns missing nationality and body type players_a = players2.select('age', 'height_cm', 'weight_kg', 'nationality', 'club', 'overall', 'potential', 'body_type', 'pace', 'physic', 'movement_agility', 'power_stamina', 'mentality_aggression') # 'age','height','weight','nationality','club',body type','overall rating','potential','pace','physique','movement agility','stamina','aggression' nationalities_agg = players2.select('nationality').distinct() nationalities_agg = nationalities_agg.join( cc, nationalities_agg['nationality'] == cc['country']).drop('country') players_a = players_a.join( nationalities_agg, players_a['nationality'] == nationalities_agg['nationality']).drop('nationality') nationalities_agg = players_a.select('continent').distinct() l = nationalities_agg.collect() country_list = {} for i in range(0, len(l)): country_list.update({l[i][0]: str(i + 1)}) # country_list players_a = players_a.withColumn('body_type', \ functions.when((functions.col('body_type') == 'Lean'), 3) \ .when((functions.col('body_type') == 'Normal'), 2) \ .when((functions.col('body_type') == 'Stocky'), 1) \ .otherwise(0)) players_a = players_a.replace(country_list, 1, 'continent') players_a.show(3) players_b = players_a.select(players_a['weight_kg'], players_a['continent'], players_a['club']) players_b = players_b.groupBy('continent').agg( functions.avg('weight_kg').alias('average weight')).orderBy( 'continent') players_c = players_a.select(players_a['continent'], players_a['body_type'], players_a['club']) players_c = players_c.groupBy('continent', 'body_type').agg( functions.count('body_type').alias('number of players')).orderBy( 'continent') players_c = players_c.withColumn('body_type', \ functions.when((functions.col('body_type') == 3), 'Lean') \ .when((functions.col('body_type') == 2), 'Normal') \ .when((functions.col('body_type') == 1), 'Stocky') \ .otherwise('null')) players_c = players_c.filter(players_c['body_type'] != 'null') players_d = players_a.select('age', 'club').groupBy('club').agg( functions.avg('age').alias('average_age')).orderBy('average_age') players_d.show(truncate=False) players_a = players_a.toPandas() players_a.to_csv('cleaned.csv', index=False) inv_map = {v: k for k, v in country_list.items()} players_b = players_b.replace(inv_map, 1, 'continent') p = players_b.toPandas() fig = px.bar(p, x="average weight", y="continent", color="continent", orientation='h', height=400) fig.show() players_c = players_c.replace(inv_map, 1, 'continent') players_c.show() players_c = players_c.toPandas() fig = px.bar(players_c, x="number of players", y="continent", color="body_type", orientation='h', height=400) fig.show() # ax = sns.catplot(x="count", y="continent", hue="body_weight", kind="bar", data=players_c) # ax.set(xlabel='Number of Players', ylabel='Continent') # sphinx_gallery_thumbnail_number = 2 df = pd.read_csv('cleaned.csv') df = df[1:] labels = [ 'age', 'height', 'weight', 'nationality', 'body type', 'overall rating', 'potential', 'pace', 'physique', 'movement agility', 'stamina', 'aggression' ] corr = df.corr().to_numpy().round(2) fig, ax = plt.subplots(figsize=(8, 8)) im = ax.imshow(corr) # possible: jet fig = ff.create_annotated_heatmap( z=corr, annotation_text=corr, x=[ 'age', 'height', 'weight', 'nationality', 'body type', 'overall rating', 'potential', 'pace', 'physique', 'movement agility', 'stamina', 'aggression' ], y=[ 'age', 'height', 'weight', 'nationality', 'body type', 'overall rating', 'potential', 'pace', 'physique', 'movement agility', 'stamina', 'aggression' ], hoverongaps=False, colorscale='Viridis', hoverinfo='z') fig.show()
ascending=False) ##### creación de graficas def figures_to_html(figs, filename="20paisescontagios.html"): dashboard = open(filename, 'w') dashboard.write("<html><head></head><body>" + "\n") for fig in figs: inner_html = fig.to_html().split('<body>')[1].split('</body>')[0] dashboard.write(inner_html) dashboard.write("</body></html>" + "\n") fig1 = px.bar(df_order_total_cases.head(20), x='location', y='total_cases', barmode='relative') figures_to_html([fig1]) def figures_to_html(figs, filename="20paisesmuertes.html"): dashboard = open(filename, 'w') dashboard.write("<html><head></head><body>" + "\n") for fig in figs: inner_html = fig.to_html().split('<body>')[1].split('</body>')[0] dashboard.write(inner_html) dashboard.write("</body></html>" + "\n") fig1 = px.bar(df_order_total_death.head(20),
# ref: https://dash.plotly.com/introduction # ref: https://plotly.com/python/ # ref: bootstrap-crash-course # ref: https://getbootstrap.com/docs/4.4/getting-started/introduction/ import dash import dash_core_components as dcc import dash_html_components as html import plotly.graph_objects as go import plotly.express as px import dash_bootstrap_components as dbc app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) data_canada = px.data.gapminder().query("country == 'Canada'") fig_1 = px.bar(data_frame=data_canada, x='year', y='pop', template='ggplot2') fig_2 = px.bar(data_frame=data_canada, x='year', y='pop', template='plotly_dark') navbar = dbc.NavbarSimple( children=[ dbc.NavItem(dbc.NavLink("Page 1", href="#")), dbc.DropdownMenu( children=[ dbc.DropdownMenuItem("More pages", header=True), dbc.DropdownMenuItem("Page 2", href="#"), dbc.DropdownMenuItem("Page 3", href="#"), ], nav=True,
st.write("Hank: That really makes sense. Thank you a lot bro! I think I can simply choose CS:GO as the start of my career since I can win tons of money from these tournaments.") st.write("Sean: Not so hurry boi. If you are interested in money, then let’s talk about the sexiest. Look at this chart, it shows the actual prize pool each game has every year. Dota2 is the most generous and they gave out over 30M dollars!") xaxis_config = { 'title':'Game' } prize_df = get_data('prize')[['Name','Total Prize Pool','Year']] st.markdown('---') st.header("Let the Money Talk") values = st.selectbox("Select Year ",years) selected_df = prize_df.where(prize_df['Year']==str(values)) selected_df = selected_df.dropna() f = px.bar(selected_df, x="Name", y = 'Total Prize Pool',title = 'Total Prize Pool',color='Name') f.update_layout( title=title_config, yaxis={ "title":'Total Prize', 'rangemode':"tozero" }, xaxis=xaxis_config, ) st.plotly_chart(f) # Show the radar chart if show: st.write("Hank: Wow! No wonder why professional Dota gamers like Puppey are so rich! But, wait, this is really confusing. Dota2 has the least tournaments but it has the largest rewards? Should I be a professional Dota2 player then?") st.write("Sean: Well, that also means it can be quite competitive. To really make the best decision, we need a bigger picture! Let’s look at this radar chart I made for you! So this radar chart has five aspects, Player growth, Average earning, Professional rate(It’s basically how hard it is to become a professional player), Total prize each year, and Total tournaments. The larger the number, the better for you.") st.write("Hank: Wow. This chart really helps me a lot. But I still need to spend more time considering my option. ")
def bar_cases_provinces(): fig = px.bar(df, x="DATE", y="CASES", color="PROVINCE") fig.update_layout(template="plotly_white") return fig
#Figures created for the dashboard fig1 = go.Figure(data=go.Scattergeo( lon=df_final['lon'], lat=df_final['lat'], mode='markers', text=df_final['content'], marker_color=df_final['size'], )) fig1.update_layout(title="Shootings Reported per Location", geo_scope='south america', height=800) fig2 = px.bar(counts[:10], x=counts[:10].index, y=counts[:10], labels={ 'x': 'Neighborhoods', 'y': "Shooting's count" }, color=counts[:10].index) fig2.update_layout(title="Shootings reported by neighborhoods in RJ") #Prepare the data for the 3rd figure def prepare_df(df_x, name_nb='Belford Roxo'): #Function created to return a dataframe ready for plotting. #2 entry variables: original dataframe and neighborhood to filter by df_fig = df_x.copy() df_fig = df_fig.loc[df_fig['content'] == name_nb] df_fig['date'] = df_fig['date'].str[:11] df_fig = df_fig.sort_values(by=['date']) list_vals = [x for x in range(df_fig.shape[0])
def country_detail(request): country = request.POST['country'] r = requests.get('https://www.trackcorona.live/api/countries') df_coun = r.json() df_coun = pd.DataFrame(df_coun['data']) check = False write = False country_code = '' if country.lower() in 'uk' or country.lower( ) in 'united kingdom' or country.lower() in 'england' or country.lower( ) in 'britain': check = True temp = df_coun[df_coun['location'] == 'UK'] country = temp['location'].iloc[0] country_code = temp['country_code'].iloc[0] elif country.lower() in 'america' or country.lower( ) in 'united states of america' or country.lower() in 'usa': check = True temp = df_coun[df_coun['location'] == 'United States'] country = temp['location'].iloc[0] country_code = temp['country_code'].iloc[0] else: for i in range(len(df_coun)): if country.lower() in df_coun['location'][i].lower(): check = True country = df_coun['location'][i] country_code = df_coun['country_code'][i] if check: temp = df_coun[df_coun['location'] == country] Date = temp.iloc[0, 7].split()[0] Total = temp.iloc[0, 4] Discharged = temp.iloc[0, 6] Deaths = temp.iloc[0, 5] Active = Total - (Discharged + Deaths) r = requests.get('https://www.trackcorona.live/api/cities') df_city = r.json() df_city = pd.DataFrame(df_city['data']) df_city = df_city[df_city['country_code'] == country_code] if len(df_city) > 0: write = True df_city = df_city.sort_values(by=['confirmed', 'dead'], ascending=False) df_city['recovered'].fillna(value=0, inplace=True) df_city['dead'].fillna(value=0, inplace=True) df_city = df_city[:200] rows = [] for i in range(len(df_city)): inn = [] inn.append(df_city.iloc[i, 0]) inn.append(int(df_city.iloc[i, 4])) inn.append(int(df_city.iloc[i, 6])) inn.append(int(df_city.iloc[i, 5])) rows.append(inn) temp = df_city[:30] fig_total = px.bar( temp, x='location', y='confirmed', hover_data=['confirmed', 'recovered', 'dead'], template='plotly_dark', title='30 most affected State/City of {}'.format(country)) fig_total = opy.plot(fig_total, auto_open=False, output_type='div') fig_total map = folium.Map(location=[ df_coun[df_coun['country_code'] == country_code]['latitude'], df_coun[df_coun['country_code'] == country_code]['longitude'] ], tiles='CartoDB dark_matter', zoom_start=4) for lat, lon, value, name in zip(df_city['latitude'], df_city['longitude'], df_city['confirmed'], df_city['location']): folium.CircleMarker( [lat, lon], radius=3, popup=('<strong>State: </strong>' + name + '<br>' '<strong>Confirmed: </strong>' + str(value)), color='red', fill_color='red', fill_opacity=0.3).add_to(map) map = map._repr_html_() return render( request, 'rest_state.html', { 'check': check, 'write': write, 'Country': country, 'Date': Date, 'Total': Total, 'Active': Active, 'Discharged': Discharged, 'Deaths': Deaths, 'rows': rows, 'fig': fig_total, 'map': map }) else: return render(request, 'rest_state.html', {'check': check})
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Get top 5 labels and their counts labels_df = df.iloc[:, 3:] top_5_labels = labels_df.sum().sort_values(ascending=False).head(5) top_label_names = list(top_5_labels.index) top_label_counts = list(top_5_labels.values) # Generate correlation matrix for the top 5 features in the dataset top_5_corr = df.loc[:, top_label_names].corr() fig = pex.bar(top_5_corr) # create visuals graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( x=top_label_counts, y=top_label_names, orientation='h' ) ], 'layout': { 'title': 'Top 5 message labels in the dataset', 'yaxis': { 'title': "Label" }, 'xaxis': { 'title': "Count" } } }, { 'data': [ Heatmap( x=top_label_names, y=top_label_names, z=top_5_corr ) ], 'layout': { 'title': 'Correlation matrix for the top 5 labels', 'yaxis': { 'title': "Label" }, 'xaxis': { 'title': "Label" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def get_value_count_graphs(_data): for each_col in _data.columns: print("Currently in", each_col) fig = px.bar(_data[each_col].value_counts().sort_index(), title=each_col.upper()) fig.show()
"Confirmed": ":,", "Deaths": ":,", "Recovered": ":,", "Country_Region": False, }, ) bubble_map.update_layout(margin=dict(l=0, r=0, t=50, b=0), coloraxis_colorbar=dict(xanchor="left", x=0)) bars_graph = px.bar( totals_df, x="condition", hover_data={"count": ":,"}, y="count", template="plotly_dark", title="Total Global Cases", labels={ "condition": "Condition", "count": "Count", "color": "Condition" }, ) bars_graph.update_traces(marker_color=["#e74c3c", "#8e44ad", "#27ae60"]) app.layout = html.Div( style={ "minHeight": "100vh", "backgroundColor": "#111111", "color": "white", "fontFamily": "Open Sans, sans-serif", "x-scroll": "auto",
def plot_quantile_returns_bar(mean_ret_by_q, by_group=False, ylim_percentiles=None): """ Plots mean period wise returns for factor quantiles. Parameters ---------- mean_ret_by_q : pd.DataFrame DataFrame with quantile, (group) and mean period wise return values. by_group : bool Disaggregated figures by group. ylim_percentiles : tuple of integers Percentiles of observed data to use as y limits for plot. """ mean_ret_by_q = mean_ret_by_q.copy() colors = px.colors.qualitative.Plotly if ylim_percentiles is not None: ymin = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[0]) * DECIMAL_TO_BPS) ymax = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[1]) * DECIMAL_TO_BPS) else: ymin = None ymax = None if by_group: group_keys = sorted( mean_ret_by_q.index.get_level_values('group').unique()) num_group = len(group_keys) grouped = mean_ret_by_q.groupby(level='group') v_spaces = ((num_group - 1) // 2) + 1 # 每次显示二列 for i in range(v_spaces): subplot_titles = group_keys[i * 2:(i + 1) * 2] # 如实际只有一列,也设定为2 gf = make_subplots(rows=1, cols=2, y_title='收益率(基点)', subplot_titles=subplot_titles, shared_yaxes=True) for j, sc in enumerate(subplot_titles, 1): cor = grouped.get_group(sc) bar_data = cor.xs(sc, level='group').multiply(DECIMAL_TO_BPS) columns = bar_data.columns for k, name in enumerate(columns): gf.add_trace(go.Bar(name=name, legendgroup=sc, marker_color=colors[k], showlegend=True if j == 1 else False, x=bar_data.index, y=bar_data[name].values), row=1, col=j) gf.update_layout(barmode='group') gf.update_yaxes(range=[ymin, ymax]) gf.show() else: # gf = make_subplots(x_title='周期频率', y_title='收益率(基点)') bar_data = mean_ret_by_q.multiply(DECIMAL_TO_BPS) columns = bar_data.columns bar_data.reset_index(inplace=True) fig = px.bar(bar_data, x='factor_quantile', y=columns, barmode='group') fig.update_layout(title_text="因子分位数分组期间平均收益率") fig.update_xaxes(title_text='分位数') fig.update_yaxes(range=[ymin, ymax], title_text='收益率(基点)') fig.update_layout( legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)) fig.show()
import pandas as pd import plotly.express as px df = pd.read_csv("data.csv") fig = px.bar(df, x="Country", y="InternetUsers") fig.show()
)) # ------------------------------------------------------ Charts with Plotly st.subheader(f"Breakdown by minute between {hour}:00 and {hour + 1}:00") # filtering data in DF through hour filter filtered_df = vehicles_df[(vehicles_df['date_time'].dt.hour >= hour) & (vehicles_df['date_time'].dt.hour < (hour + 1))] # creating new DF only with histogram data to load into plotly bar hist = np.histogram(filtered_df['date_time'].dt.minute, bins=60, range=(0, 60))[0] chart_data = pd.DataFrame({'minute': range(60), 'crashes': hist}) # Plotly Chart fig = px.bar(chart_data, x="minute", y="crashes", hover_data=['minute', 'crashes'], height=400) st.write(fig) # ------------------------------------------------------ Final Chart st.header("Top five dangerous streets by affected type") selection = st.selectbox(label='Afected type of people', options=['Pedestrians', 'Cyclists', 'Motorists']) # this could be done better than showed in course project if selection == 'Pedestrians': st.write(vehicles_df.query("injured_pedestrians >=1") \ [['on_street_name', 'injured_pedestrians']].nlargest(5, 'injured_pedestrians').dropna(how='any')) elif selection == 'Cyclists':
def main(): image = Image.open('images/wordcloud.png') st.sidebar.image(image, width=200) st.sidebar.header("NLP demos") st.sidebar.text("Select an option and see it in action!") st.title("Natural Language Processing demos") st.markdown(""" #### An NLP app for demonstration purposes: analyze your text! """) # Named Entity Recognition if st.sidebar.checkbox("Named Entity Recognition"): lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT']) if lang_options == 'EN': lang_model = 'en_core_web_sm' else: lang_model = 'pt_core_news_sm' message = st.text_area("Enter text inside the box...") if st.button("Run"): with st.spinner('Wait for it...'): entity_result = entity_analyzer(message, lang_model) st.success(st.json(entity_result)) # Summarization if st.sidebar.checkbox("Text Summarization"): st.subheader("Summarize Your Text") message = st.text_area( "Enter text (EN only for now) inside the box...") ratio_value = st.slider( 'Select a ratio (%) that determines the proportion of the number of sentences of the original text to be chosen for the summary', 0, 100, (10)) if st.button("Run"): with st.spinner('Wait for it...'): summary_result = summarize(message, ratio=ratio_value / 100) st.success(summary_result) # # Automated Keyword Extraction # if st.sidebar.checkbox("Automated Keyword Extraction"): # st.subheader("Extract Keywords") # lang_options = st.selectbox("Choose language (EN/PT)",['EN','PT']) # if lang_options == 'EN': # lang_model = 'en' # elif lang_options == 'PT': # lang_model = 'pt' # else: # lang_model = 'en' # message = st.text_area("Enter text inside the box...") # if st.button("Run"): # with st.spinner('Wait for it...'): # # set YAKE! parameters # language = lang_model # max_ngram_size = 2 # deduplication_thresold = 0.2 # deduplication_algo = "seqm" # windowSize = 1 # numOfKeywords = 10 # custom_kw_extractor = yake.KeywordExtractor( # lan=language, # n=max_ngram_size, # dedupLim=deduplication_thresold, # dedupFunc=deduplication_algo, # windowsSize=windowSize, # top=numOfKeywords, # features=None, # ) # keywords = custom_kw_extractor.extract_keywords(message) # keywords = [kw for kw, res in keywords] # st.success('Keywords: ' + (', '.join(sorted(keywords)))) # Automated Keyword Extraction if st.sidebar.checkbox("Automated Keyword Extraction"): st.subheader("Extract Keywords") lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT']) if lang_options == 'EN': stop_words = en_stopwords lang_model = 'en_core_web_sm' else: lang_model = 'pt_core_news_sm' stop_words = pt_stopwords # nlp = spacy.load(lang_model) message = st.text_area("Enter text inside the box...") if st.button("Run"): with st.spinner('Wait for it...'): # corpus = [] text = ''.join([ unidecode.unidecode(accented_string) for accented_string in message ]) corpus = clean_string(text, lang_options) tr4w = TextRank4Keyword() tr4w.set_stopwords(stopwords=stop_words, lang_model=lang_model) # tr4w.set_stopwords(stopwords=stop_words) # tr4w.analyze(ppp, candidate_pos = ['NOUN', 'PROPN', 'VERB'], window_size=4, lower=False) tr4w.analyze(corpus, window_size=4, lower=False, lang_model=lang_model) st.success('Keywords: ' + (', '.join(sorted(tr4w.get_keywords(10))))) # Data Anonymization (erasing names) if st.sidebar.checkbox("Anonymize Personal Data"): st.subheader("Anonymize Your Data: Hiding Names") lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT']) if lang_options == 'EN': lang_model = 'en_core_web_sm' elif lang_options == 'PT': lang_model = 'pt_core_news_sm' else: lang_model = 'en_core_web_sm' message = st.text_area("Enter text inside the box...") if st.button("Run"): with st.spinner('Wait for it...'): names_cleaned_result = sanitize_names(message, lang_model) st.success(names_cleaned_result) # N-grams if st.sidebar.checkbox("N-Grams Barplot"): st.subheader("Visualize an N-grams barplot") lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT']) if lang_options == 'EN': lang_model = 'english' elif lang_options == 'PT': lang_model = 'portuguese' else: lang_model = 'english' ngram_options = st.selectbox("Choose N for N-grams (1, 2 or 3)", [1, 2, 3]) if ngram_options == 1: ngrams = 1 elif ngram_options == 2: ngrams = 2 else: ngrams = 3 message = st.text_area("Let's analyze and get some visuals...") if st.button("Run"): with st.spinner('Wait for it...'): corpus = [] text = ''.join([ unidecode.unidecode(accented_string) for accented_string in message ]) corpus.append(clean_string(text, lang_model)) top3_words = get_top_n_words(corpus, ngrams, n=20) top3_df = pd.DataFrame(top3_words) top3_df.columns = ["N-gram", "Freq"] fig = px.bar(top3_df, x='N-gram', y='Freq') st.plotly_chart(fig) # Wordcloud if st.sidebar.checkbox("Wordcloud"): st.subheader("Visualize a wordcloud") lang_options = st.selectbox("Choose language (EN/PT)", ['EN', 'PT']) if lang_options == 'EN': lang_model = 'en_core_web_sm' stop_words = en_stopwords else: lang_model = 'pt_core_news_sm' stop_words = pt_stopwords message = st.text_area("Let's analyze and get some visuals...") if st.button("Run"): with st.spinner('Wait for it...'): corpus = [] text = ''.join([ unidecode.unidecode(accented_string) for accented_string in message ]) corpus.append(clean_string(text, lang_model)) #Word cloud wordcloud = WordCloud(background_color='white', stopwords=stop_words, max_words=100, max_font_size=50, random_state=42).generate(str(corpus)) fig = plt.figure(1) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') st.pyplot()
# - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - # # Loading data # - - - - - - - - - - - - - - # df = pd.DataFrame({ "Fruit": ["Apples", "Oranges", "Bananas", "Apples", "Oranges", "Bananas"], "Amount": [4, 1, 2, 2, 4, 5], "City": ["SF", "SF", "SF", "Montreal", "Montreal", "Montreal"] }) # - - - - - - - - - - - - - - # # Defining figures # - - - - - - - - - - - - - - # fig = px.bar(df, x="Fruit", y="Amount", color="City", barmode="group") # - - - - - - - - - - - - - - # # App layout # - - - - - - - - - - - - - - # app.layout = html.Div(children=[ html.H1(children='Hello Dash EPF'), html.Div(children=''' Dash: A web application framework for Python. '''), dcc.Graph( id='example-graph', figure=fig
# Rise of COVID-19 cases in India fig = go.Figure() fig.add_trace( go.Scatter(x=dbd_India['Date'], y=dbd_India['Total Cases'], mode='lines+markers', name='Total Cases')) fig.update_layout( title_text='Trend of Coronavirus Cases in India (Cumulative cases)', plot_bgcolor='rgb(230, 230, 230)') fig.show() # New COVID-19 cases reported daily in India import plotly.express as px fig = px.bar(dbd_India, x="Date", y="New Cases", barmode='group', height=400) fig.update_layout(title_text='Coronavirus Cases in India on daily basis', plot_bgcolor='rgb(230, 230, 230)') fig.show() """# Part 2: Is the trend similar to Italy/ S.Korea/ Wuhan? India has already crossed 562 cases. It is very important to contain the situation in the coming 21 days.The numbers of coronavirus patients starting doubling after these countries hit the 100 mark and almost starting increasing exponentially. ## 2.1 Cumulative cases in India, Italy, S.Korea, and Wuhan """ # import plotly.express as px fig = px.bar(dbd_India, x="Date", y="Total Cases",
def update_by_genre_and_role(district): data = [] my_df = df[df["DISTRITO"] == district] figure = px.bar(my_df, x="SEXO", y="TIPO PERSONA", color="TIPO PERSONA") return figure
xaxis = dict(zeroline = False) ) # Let's learn such basic beautifying with ggplot ggplot(data, aes(x="GDP", y="Infant Mortality Rate"))+geom_point(fill="pink", color="darkred", size=3) # How GDP of the World Changing over time? Filled line graph fig = px.area(data_frame = data, x = "Year", y = "GDP", line_group="Country/Region") fig.show(); # How GDP of different countries are changing over time? ggplot(data, aes(x= "Year", y = "GDP", group="Country/Region")) + geom_line() + geom_point() # Which Region is producing the highest CO2 Emission? Bar graph fig = px.bar(data_frame = data, x = "Region", y = "CO2 Emissions") fig.show() # Compare Europe and Africa in terms the member countries' birth rates to share your insights? Create a Boxplot of birthrate and show all the regions on the plot fig = px.box(data_frame = data, x = "Region", y = "Birth Rate") fig.show() # Create a histogram of internet usage fig = px.histogram(data_frame = data, x = "Internet Usage") fig.show()
external_stylesheets = [ 'https://codepen.io/chriddyp/pen/bWLwgP.css', "./static/da_style.css" ] # server = Flask(__name__) app2 = dash.Dash(__name__, external_stylesheets=external_stylesheets) conn = _sqlite3.connect('./r5.db', check_same_thread=False) data = pd.read_sql('Select roll_no,subject,marks from scorekids', conn) data1 = pd.read_sql('Select roll_no,status from result', conn) #print(data) total_rows = len(data.axes[0]) #print(total_rows) fig = make_subplots(rows=1, cols=2) fig = px.bar(data, x="roll_no", y="marks", color="subject", barmode="group") fig1 = px.pie(data1, values='roll_no', names='status') app2.layout = html.Div(children=[ html.H1('Scoresheet', style={ 'textAlign': 'center', }), dcc.Graph(id='bar-graph', figure=fig), html.Hr(), html.H1('Pie Chart', style={ 'textAlign': 'center', 'paddingTop': 20 }), dcc.Graph(id='pie-chart', figure=fig1), ], style={'backgroundColor': 'white'})
df3 = df2.drop(columns=['value']) df3.head() Total = df2['value'].sum() print (Total) """The below interactive bar plot is showing me the different crimes that occured in the year 2011. The bar plot allows me to see which borough in London had the most crimes in the year, giving me the interaction of seeing which crimes occured (major or minor). Looking at this, I would like to analyse the column 'month' and see how it differs in the type of crime that occured.""" import plotly.express as px data = px.data.gapminder() df3_major = df3['crime_occured'] fig = px.bar(df2, x='borough', y='value', hover_data=['major_category', 'minor_category'], color='crime_occured', labels={'Month':'Sum of Crimes Occured'}, height=400) #fig.set_title("The frequency and type of crime occured in different boroughs in London") fig.show() import plotly.express as px data = px.data.gapminder() df3_major = df3['crime_occured'] fig = px.bar(df3, x='borough', y='crime_occured', hover_data=['major_category', 'minor_category'], color='crime_occured', labels={'Month':'Sum of Crimes Occured'}, height=400) #fig.set_title("The frequency and type of crime occured in different boroughs in London") fig.show() # Scatter plot for minor_category
for tweets in original_tweets: if len(tweets[4]) != 0: for tag in tweets[4]: if tag in tags_dictionary.keys(): tags_dictionary[tag] += 1 else: tags_dictionary[tag] = 1 #Sort the tags_dictionary according to the count of tags sorted_temp = sorted((value, key) for (key, value) in tags_dictionary.items()) sorted_temp.reverse() #Plot all tags that Trump has used in 2019 in his original tweets and their corresponding frequency sorted_tags = pd.DataFrame(sorted_temp, columns=['counts', 'tags']) print(sorted_tags) tags_counts = px.bar(sorted_tags, x='tags', y='counts') tags_counts.show() #print the top 20 tags Trump has used in 2019 top_ten_tags = px.bar(sorted_tags[0:19], x='tags', y='counts') top_ten_tags.show() # In[7]: '''The above bar charts show what hashtags Trump liked to use in 2019. Unexpectedly, Trump only used #MAGA for 72 times, which is a small amount compared to his original tweets. According to the first bar chart, Trump tended to use different hashtags among his tweets because most of the hashtags only appear once in his tweets. The second bar chart shows the top 20 most frequently used hashtags by Trump. Unsurprisingly, political campaign hashtags, including #MAGA, #KAG2020, #2020, and #KAG are used most frequently. Hashtags related to contemporary news
# 'text': "Field of Study of Doctorate Recipients by Sex and Selected Years", # 'y':0.94, # 'x':0.5, # 'xanchor': 'center', # 'yanchor': 'top'}) st.plotly_chart(fig_1) st.sidebar.markdown( "### Earned Doctorate Recipients by Sex and Race/Ethnicity") select2 = st.sidebar.selectbox('Visualization type', ['Barplot'], key='2') data2 = pd.DataFrame(data) if not st.sidebar.checkbox("Hide", True, key='2'): if select2 == 'Bar plot': fig_2 = px.bar(data2, x="Race_and_Ethnicity", y="Number", color="Sex", height=500) st.plotly_chart(fig_2) st.sidebar.markdown("### Earned Doctorate Recipients by Race and Ethnicity") select3 = st.sidebar.selectbox('Visualization type', ['Bar plot', 'Pie chart'], key='3') race_count = data.groupby(["Race_and_Ethnicity"])['Number'].agg('sum') race_count = pd.DataFrame({ 'Race_and_Ethnicity': race_count.index, 'Number': race_count.values }) if not st.sidebar.checkbox("Hide", True, key='3'): st.markdown("### Earned Doctorate Recipients by Race/Ethnicity") if select3 == 'Bar plot':