def line_plots(data: pd.DataFrame, lang: NullTranslations) -> None: """Renders line plots, both general and regional, of data argument. Usually it is the resulting DataFrame from the website of the Protezione civile.""" _ = lang.gettext # Group data by date general = calculate_positive_tests_ratio( data.groupby("data", as_index=False).sum(), lang) st.title(_("COVID-19 in Italy - Temporal trend")) st.markdown("### " + _("14-day cases per 100.000:")) # Get today today = general["data"].sort_values(ascending=False).iloc[0] # Filter for most recent 14 days and write calculation fourteen_day_new_positves = general[today - datetime.timedelta( days=14) < general["data"]][_("new_positive")] st.write( float( f"{fourteen_day_new_positves.sum() * 100000 / ITALIAN_POPULATION:.2f}" )) # Indicator chooser st.markdown(_("What indicator would you like to visualise?")) features = get_features(general) feature = st.selectbox(label=_("Choose..."), options=features, format_func=formatter, index=6) # Add checkbox for diff with most recent data for an indicator diff = st.checkbox(label=_("Difference with previous datapoint")) st.markdown( _("By checking the above box, the indicator will be replaced by the difference of its value between two consecutive days. This helps in untangling cumulative data such as deaths and total tests." )) if diff: general = diff_over_previous_datapoint(general, "data", feature) # Choose log scale or linear, defines what feature to use general_choice = st.radio(label=_("Scale"), options=[_("linear"), _("logarithmic")]) if general_choice == _("logarithmic"): general = general[general[feature] > 0] general_scale = alt.Scale(type="log") else: general_scale = alt.Scale(type="linear") st.markdown(("## " + _("General data"))) # Average calculation if needed is_general_average = st.checkbox(label=_("Average over days"), key="avg1", value=True) if is_general_average: avg_days = st.slider( label=_("Days to average over"), min_value=1, max_value=21, value=7, key="slider1", ) general_average = average_over_days(general[[feature, "data"]], categorical_columns=["data"], avg_days=avg_days) general = general_average general_chart = generate_global_chart(general, feature, general_scale, _("Month and day")) st.altair_chart(general_chart) todays_latest = general[general["data"] == today][feature].iloc[0] st.markdown( _("Latest data on ") + f"**{formatter(feature).lower()}**: " + f"{todays_latest:.2f}") st.markdown(("## " + _("Situation in different regions"))) # Get list of regions and select the ones of interest region_options = data[_( "denominazione_regione")].sort_values().unique().tolist() regions = st.multiselect( label=_("Regions"), options=region_options, default=["Lombardia", "Veneto", "Campania", "Lazio"], ) # Filter regions in selection selected_regions = data[data[_("denominazione_regione")].isin(regions)] if selected_regions.empty: st.warning(_("No region selected!")) else: # Need to handle positive test percentage in if if feature == _("positivi_per_tampone_%"): selected_regions = (selected_regions.groupby([ _("denominazione_regione") ]).apply(lambda group: calculate_positive_tests_ratio(group, lang) ).sort_values(by="data", ascending=True).reset_index( level=0, drop=True).reset_index(drop=True)) if diff: selected_regions = (selected_regions.groupby([ _("denominazione_regione") ]).apply(lambda group: diff_over_previous_datapoint( group, "data", feature)).sort_values( by="data", ascending=True).reset_index( level=0, drop=True).reset_index(drop=True)) regional_choice = st.radio(label=_("Regional Scale"), options=[_("linear"), _("logarithmic")]) if regional_choice == _("logarithmic"): selected_regions = selected_regions[selected_regions[feature] > 0] regional_scale = alt.Scale(type="log") else: regional_scale = alt.Scale(type="linear") is_regional_average = st.checkbox(label=_("Average over days"), key="avg2", value=True) if is_regional_average: avg_days = st.slider( label=_("Days to average over"), min_value=1, max_value=21, value=7, key="slider2", ) regional_average = (selected_regions.groupby( [_("denominazione_regione")], as_index=False).apply(lambda group: average_over_days( group[[feature, "data", _("denominazione_regione")]], ["data", _("denominazione_regione")], avg_days, )).reset_index(level=0, drop=True).reset_index(drop=True)) regional_average_chart = generate_regional_chart( regional_average, feature, regional_scale, x_title=_("Month and day"), color_title=_("Region"), ) st.altair_chart(regional_average_chart) else: regional_chart = generate_regional_chart( selected_regions, feature, regional_scale, x_title=_("Month and day"), color_title=_("Region"), ) st.altair_chart(regional_chart)
data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH DATE', 'CRASH TIME']]) data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data.rename(columns={'crash date_crash time': 'date/time'}, inplace=True) data.columns = [column.replace(" ", "_") for column in data.columns] return data data = load_data(100000) original_data = data st.header("Where are the most numebr of persons injured in NYC?") injured_people = st.slider("Number of persons injured in vehicle collisions", 0, 19) st.map( data.query('number_of_persons_injured >= @injured_people')[[ 'latitude', 'longitude' ]].dropna(how="any")) st.header("How many collisions occur during a given time of day?") # hour = st.selectbox("Hour to look at", range(1,24), 1) hour = st.slider("Hour to look at", 0, 23) data = data[data['date/time'].dt.hour == hour] st.markdown("Collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24)) #initialize the map with correct points midpoint = (np.average(data['latitude']), np.average(data['longitude'])) st.write( pdk.Deck(
# parsing dates and making a year and month col df['last_review'] = pd.to_datetime(df['last_review'], format="%Y-%m-%d") df['year'] = df['last_review'].dt.year.astype("Int64") df['month'] = df['last_review'].dt.month.astype("Int64") df['day'] = df['last_review'].dt.day.astype("Int64") return df.dropna().sample(frac=fraction) df = load_data(fraction=0.1) st.header('Data') n_groups = list(df.neighbourhood_group.unique()) neighbourhood_group = st.multiselect(label="Select Neighbourhood Groups", options=n_groups, default=n_groups) (min_price, max_price) = st.slider("Price Range", min_value=0, max_value=df.price.max(), value=(0,300)) st.write(f"Min Price: {min_price}, Max Price: {max_price}") df = df.query("neighbourhood_group == @neighbourhood_group and @min_price < price < @max_price") st.text(f'Looking at {df.shape[0]:,} rows selected using above options') st.dataframe(df.head(5)) st.header("Price ") fig = px.scatter(df.query("price < 2000"), x="year", y="price", color="room_type", title="Price over years with price outliers removed") #fig.update_layout(legend_orientation='h') st.plotly_chart(fig)
img_placeholder = st.empty() col1, col2 = st.beta_columns(2) with col1: st.write("**Pixel spacing**") st.write(f"x: {scan.PixelSpacing:.2f} mm") st.write(f"y: {scan.PixelSpacing:.2f} mm") st.write(f"z: {scan.SliceSpacing:.2f} mm") st.write("**Device**") st.write(f"{scan.ManufacturerModelName} (by {scan.Manufacturer})") with col2: overlay_nodules = st.checkbox("Show nodule overlay", value=True) z = st.slider("Slice:", min_value=1, max_value=img_arr.shape[2], value=int(img_arr.shape[2] / 2)) level = st.number_input("Window level:", value=-600) width = st.number_input("Window width:", value=1500) img = get_img_slice(img_arr, z - 1, window=(level, width)) if overlay_nodules: mask = get_mask_slice(mask_arr, z - 1) overlay = get_overlay() ct = Image.composite(overlay, img, mask) img_placeholder.image(ct, use_column_width=True) else: img_placeholder.image(img, use_column_width=True) st.subheader("Detected nodules")
index=(160)) countries_for_comparison = [] countries_for_comparison.append(country_1_selected) countries_for_comparison.append(country_2_selected) countries_for_comparison.append(country_3_selected) subselect_of_gdp = gdp_df[gdp_df['country'].isin(countries_for_comparison)] countries_for_plotting = subselect_of_gdp.pivot(index='year', columns='country', values='gdppc') countries_for_plotting = countries_for_plotting.reset_index() value = st.slider('test', min_value=1000, max_value=2018, value=(1900, 2018), step=100) for_plotting = countries_for_plotting[ (countries_for_plotting['year'] >= value[0]) & (countries_for_plotting['year'] <= value[1])] # Chart title and legends x_axis_title = 'Date' y_axis_title = 'GDP per capita' # State 1 Chart fig1 = px.line( for_plotting, x="year", y=[country_1_selected, country_2_selected, country_3_selected],
def load_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']]) data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True) return data data = load_data(100000) original_data = data st.header('Where are the most people injured in NYC?') injured_people = st.slider('Number of persons injured in vehicle collisions', 0, 19) st.map( data.query('injured_persons >= @injured_people')[['latitude', 'longitude' ]].dropna(how='any')) st.header('How many collisions occur during a given time of day?') hour = st.slider('How to look up', 0, 23) data = data[data['date/time'].dt.hour == hour] st.markdown('Vehicle collisions between %i:00 and %i:00' % (hour, (hour + 1) % 24)) midpoint = (np.average(data['latitude']), np.average(data['longitude'])) st.write( pdk.Deck( map_style='mapbox://styles/mapbox/light-v9',
def main(): # Title and Subheader st.title("Iris Dataset EDA App") st.subheader("EDA Web App with Streamlit ") DATA_URL = ( 'https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv' ) @st.cache(persist=True, show_spinner=True) def load_data(): data = pd.read_csv(DATA_URL) data.columns = ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species') # lowercase = lambda x: str(x).lower() # data.rename(lowercase, axis='columns', inplace=True) # data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN]) return data # Create a text element and let the reader know the data is loading. data_load_state = st.text('Loading data...') # Load 10,000 rows of data into the dataframe. data = load_data() # Notify the reader that the data was successfully loaded. data_load_state.text('Loading data...Completed!') # Show Dataset # if st.checkbox("Preview DataFrame: Head or Tail"): # if st.button("Head"): # st.write(data.head()) # if st.button("Tail"): # st.write(data.tail()) # Show Entire Dataframe if st.checkbox("View DataFrame"): st.dataframe(data) # Show Description if st.checkbox("View All Column Names"): st.text("Columns:") st.write(data.columns) # Dimensions - Radio Buttonss # data_dim = st.radio('Check the dimensions of the dataframe',('Rows','Columns')) # if data_dim == 'Rows': # st.write("There are", len(data), "Rows in the dataset") # if data_dim == 'Columns': # st.write("There are", data.shape[1], "Columns in the dataset") if st.checkbox("Show Summary of Dataset"): st.write(data.describe()) st.write("There are", len(data), "rows and", data.shape[1], "columns in the dataset") # Selection if st.checkbox("View Single Column's Data"): species_option = st.selectbox( 'Select Columns', ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species')) if species_option == 'sepal_length': st.write(data['sepal_length']) elif species_option == 'sepal_width': st.write(data['sepal_width']) elif species_option == 'petal_length': st.write(data['petal_length']) elif species_option == 'petal_width': st.write(data['petal_width']) elif species_option == 'species': st.write(data['species']) else: st.write("Select A Column") # Show Plots if st.checkbox("Show Plots"): st.write("_" * 10) data.plot(kind='scatter', x='sepal_length', y='sepal_width') st.pyplot() st.write( "---------------- 2D Scatter Plot of Sepal_length vs Sepal_width for all the Species ---------------- " ) st.write("_" * 10) st.write(sns.pairplot(data, hue="species", size=3)) # Use Matplotlib to render seaborn st.pyplot() st.write( "---------------- Pairplot of different species ----------------") st.write("_" * 10) v_counts = data.groupby('species') st.bar_chart(v_counts) st.write( "---------------- Bar Plot of Groups or Counts ----------------") st.write("_" * 10) # Iris Image Manipulation @st.cache def load_image(img): im = Image.open(os.path.join(img)) return im # Image Type if st.checkbox("Show/Hide Images"): species_type = st.radio( 'Have a look at the images of different Iris Species!', ('Setosa', 'Versicolor', 'Virginica')) if species_type == 'Setosa': st.text("Showing Setosa Species") my_image = load_image('images/setosa.png') elif species_type == 'Versicolor': st.text("Showing Versicolor Species") my_image = load_image('images/versicolor.png') elif species_type == 'Virginica': st.text("Showing Virginica Species") my_image = load_image('images/virginica.png') if st.checkbox("Enhance Image"): enh = ImageEnhance.Contrast(my_image) num = st.slider("Contrast", 1.0, 2.0) img_width = st.slider( "Zoom in the Image (Set Image Width in Pixels)", 300, 700) st.image(enh.enhance(num), width=img_width) else: img_width = 300 num = 1.2 enh = ImageEnhance.Contrast(my_image) st.image(enh.enhance(num), width=img_width) # About if st.button("About App"): st.subheader("Iris Dataset EDA App - Developed by Deepankar Kotnala") st.text("Built with Streamlit")
left_column, right_column = st.beta_columns(2) button = left_column.button("右側にグラフを表示") with right_column: if button: st.line_chart(dg) # st.area_chart(dg) # st.bar_chart(dg) # Map dm = pd.DataFrame({"lat":[35.702202], "lon":[139.414096]}) # map.token("pk.eyJ1IjoiaGlyb3Rha2VrMDYiLCJhIjoiY2tpbzI2NGgwMTh5dTJyanpxczBtNGZsdyJ9.8WcugSD91Zq4M5KFqPzwvg") st.map(dm) # Interactive Wedgets # input box st.title("interactive wedgets") text = st.text_input("あなたの趣味を教えて下さい。") # text = st.sidebar.text_input("あなたの趣味を教えて下さい。") # sidbar表示の場合 "あなたの趣味:",text condition = st.slider("あなたの調子は?", 0, 100, 50) # condition = st..sidebar.slider("あなたの調子は?", 0, 100, 50) # sidbar表示の場合 "あなたの調子:", condition # expander expander1 = st.beta_expander("質問") expander1.write("回答")
import streamlit as st x = st.slider('Select a value') st.write(x, 'squared id', x**2)
def main(): # Newspaper def paperHeadlines(paper, number): top_news = [] if paper == 'Dainik Bhaskar': web_content = requests.get("https://www.bhaskar.com/coronavirus/") soup = BeautifulSoup(web_content.text, "html.parser") for a in soup.findAll('a', attrs={'class': "list_thumb"}): x = a.get('title') top_news.append(x) elif paper == 'Patrika': web_content = requests.get( 'https://www.patrika.com/topic/coronavirus/') soup = BeautifulSoup(web_content.text, "html.parser") top = soup.find_all('div', attrs={'class': 'ctbl-text'}) for i in top: top_news.append(i.text.strip()) elif paper == 'Navbharat': web_content = requests.get( 'https://navbharattimes.indiatimes.com/coronavirus/trending/74460387.cms' ) soup = BeautifulSoup(web_content.text, "html.parser") top = soup.find_all('a', attrs={'class': 'cor_rest_art'}) for i in top: top_news.append(i.text.strip()) elif paper == 'Amarujala': web_content = requests.get( 'https://www.amarujala.com/tags/corona-special-news?page=1') soup = BeautifulSoup(web_content.text, "html.parser") top = soup.find_all('h3') for i in top: top_news.append(i.text.strip()) elif paper == 'India Today': web_content = requests.get('https://www.indiatoday.in/coronavirus') soup = BeautifulSoup(web_content.text, "html.parser") top = soup.find_all('h3') for i in top: top_news.append(i.text.strip()) return top_news[:number] # return top_news[:number] # Front Page ----------------------------------------------------------------------- st.markdown( "<body style='background-color:white;'><h1 style='text-align: center; color: blue;'>REAL TIME COVID-19 ANALYSIS</h1></body>", unsafe_allow_html=True) img = Image.open('covid1.PNG') st.image(img, width=700) st.markdown( "<body style='background-color:CornflowerBlue;'><h3 style='text-align: center; color: green;'>Helpline Number for Corona Virus : +91-11-23978046 or 1075</h3></body>", unsafe_allow_html=True) st.markdown( "<a href='https://www.mohfw.gov.in//'><marquee>Click here for Guidelines by Health Ministry of India</marquee></a>", unsafe_allow_html=True) st.markdown( "<body style='background-color:CornflowerBlue;'><h3 style='text-align: center; color: red;'>#INDIAFIGHTSCORONA</h3></body>", unsafe_allow_html=True) st.markdown( "<body style='background-color:DarkTurquoise;'><h3 style='text-align: center; color: black;'>#StayHome_StaySafe</h3></body>", unsafe_allow_html=True) day = ['Select', 'Today', 'Yesterday', '2 Days Ago'] st.markdown( "<body style='background-color:white;'><h1 style='text-align: center; color: green;'>SELECT ACTIVITIES FROM THE SIDEBAR 👈</h1></body>", unsafe_allow_html=True) activities = ["Select", "Indian News Paper Headlines"] st.sidebar.markdown( "<body style='background-color:CornflowerBlue;'><h3 style='text-align: center; color: black;'>Please Select the Activities</h3></body>", unsafe_allow_html=True) # task 1 Newspaper #********************* activity = st.sidebar.selectbox("", activities) if activity == activities[1]: st.markdown( "<body style='background-color:white;'><h1 style='text-align: center; color: #a84c32;'>Covid19 Newspaper Headlines</h1></body>", unsafe_allow_html=True) news = [ 'Select', 'Dainik Bhaskar', 'Patrika', 'Navbharat', 'India Today', 'Amarujala' ] paper = st.selectbox('', news) if paper == 'Select': pass else: st.markdown( "<body style='background-color:white;'><h3 style='text-align: center; color: green;'>Slide through the slider to see the COVID19 news</h3></body>", unsafe_allow_html=True) number = st.slider(" ", 1, 15) headlines = paperHeadlines(paper, number) for i in headlines: st.info(i) else: pass st.markdown( "<body style='background-color:white;'><h3 style='text-align: center; color: RED;'>*********By Ayush kumar*******<h3></body>", unsafe_allow_html=True)
import streamlit as st from scraping_news import pull_articles import pandas as pd def make_clickable(link, text): return f'<a target="_blank" href="{link}">{text}</a>' st.title("Planet Mountain News Roundup") user_input = st.text_input( "What would you like to search for? (Default: '9b')", '9b') num_pages = st.slider("Number of pages to scan", min_value=1, max_value=20) scan_button = st.button("Scan") # c1, c2 = st.beta_columns(2) # if scan_button: # articles, thumbs, links = pull_articles(num_pages, user_input) # for article in range(len(articles)): # c1.image(thumbs[article]) # link = make_clickable(links[article], articles[article]) # df = pd.DataFrame(link, columns=['Link']) # c2.write(df['Link'].to_html()) # else: # pass if scan_button: articles, thumbs, links = pull_articles(num_pages, user_input) data = list(zip(thumbs, articles, links))
def main(): session_state = session.SessionState.get( key=session.get_user_id(), update=False, state_name="Acre", state_id="AC", city_name="Todos", administrative_level="Todos", refresh=False, reset=False, already_generated_user_id=None, pages_open=None, amplitude_events=None, button_styles=dict(), continuation_selection=None, button_simule=0, section1_organize=False, section2_manage=False, ) utils.localCSS("style.css") config = yaml.load(open("config/config.yaml", "r"), Loader=yaml.FullLoader) df = get_data(config) genSelectBox(df, session_state) params = dict() main_icon = utils.load_image("imgs/simulation_main_icon.png") st.write( f""" <div class="text-title-section minor-padding"> Quantos <span class="bold main-orange-span">estudantes e professores(as)</span> retornam às salas de aula em diferentes modelos? </div> <div class="container main-padding" style="padding-left:0px;"> <div class="container minor-padding main-orange-span" style="font-size: 20px; color:#FF934A; font-weight: bold;"> <img class="minor-icon" src="data:image/png;base64,{main_icon}" alt="Fonte: Flaticon"> Simule o retorno </div> <div class="minor-padding"> O retorno às atividades presenciais deve ser pensado em etapas para definir não só <b>quem pode retornar</b>, mas também <b>como</b>. Trazemos abaixo um passo a passo para construir a simulação da sua rede - experimente! </div> <div class="minor-padding" style="font-size: 20px; color:#FF934A; font-weight: bold;"> <br>Para qual etapa de ensino você está planejando? </div> """, unsafe_allow_html=True, ) # TODO: colocar por estado somente também # if city_name: data = df[ (df["city_name"] == session_state.city_name) & (df["administrative_level"] == session_state.administrative_level)] col1, col2 = st.beta_columns([0.9, 0.2]) with col1: education_phase = st.selectbox( "", data["education_phase"].sort_values().unique()) data = data[data["education_phase"] == education_phase] with col2: st.write( f"""<div class="container"> <br> </div> <br> """, unsafe_allow_html=True, ) st.write( f"""<br> <div class="container" style="padding-left:0px;"> <div class="minor-padding" style="font-size: 20px; color:#FF934A;"><b>1. Escolha o modelo de retorno às atividades</b></div> <div class="minor-padding"> Existem diversos modelos possíveis de retorno avaliadas de acordo com as etapas de aprendizado. Separamos abaixo 5 opções possíveis indicadas pela UNESCO. </div> </div> """, unsafe_allow_html=True, ) UNESCO_models = { 'Totalmente Presencial': { "description": """Neste modelo, todos os estudantes <b>retornam às aulas presenciais padrão</b>, isto é, os mesmos horários em sala de aula, porém seguindo os novos protocolos de distanciamento e segurança sanitária. <br><br><b>Por que este modelo?</b><br> Modelo tradicional, onde os estudantes e docentes estão habituados.""", "hours_per_day": 5, "priority": False }, 'Aulas presenciais + Tarefas remota': { "description": """Neste modelo professores(as) <b>transmitem conceitos para os estudantes presencialmente</b>, e, em seguida, <b>estudantes completam exercícios e tarefas em casa</b>. <br><br><b>Por que este modelo?</b><br> Alunos e professores mantêm um contato próximo, e estudantes podem tirar dúvidas durante a exposição da matéria.""", "hours_per_day": 3, "priority": False }, 'Aulas por vídeo + Tarefas presenciais': { "description": """Neste modelo estudantes <b>aprendem novos conceitos de forma remota</b> e, em seguida, <b>concluem exercícios e tarefas presencialmente</b> com o(a) professor(a). <br><br><b>Por que este modelo?</b><br> Alunos e professores mantêm o convívio, e os estudantes podem tirar dúvidas urante a realização dos exercícios e se beneficiarem com as dúvidas dos colegas.""", "hours_per_day": 2, "priority": False }, 'Grupo prioritário presencial': { "description": """Neste modelo, os professores têm uma <b>aula normal completa com um grupo de estudantes presencial, enquanto outro grupo acompanha remotamente por meio de videoconferência (VC)</b>. <br><br><b>Por que este modelo?</b> Turma mantém o convívio, mesmo que virtual, e os professores atentem todos da turma no mesmo momento.""", "hours_per_day": 5, "priority": True } } col1_1, col1_2, col1_3, col1_4 = st.beta_columns([0.35, 0.05, 0.85, 0.3]) with col1_1: params["education_model"] = st.selectbox("", list(UNESCO_models.keys())) params["priority"] = UNESCO_models[ params["education_model"]]["priority"] with col1_2: st.write( f""" <div class="container main-padding"> <br> </div> """, unsafe_allow_html=True, ) with col1_3: # Sobre o modelo st.write( f""" <div class="col light-green-simulator-bg card-simulator" style="border-radius:30px;"> <div style="font-family: 'Roboto Condensed', sans-serif; padding:10px; margin-bottom:0px; margin-top: 16px;margin-left: 16px; margin-right: 16px;"> <b>{params["education_model"]}</b> <br><br>{UNESCO_models[params["education_model"]]["description"]} <br><br><b><a href="https://en.unesco.org/sites/default/files/unesco-covid-19-response-toolkit-hybrid-learning.pdf">FONTE: UNESCO</a></b> </div> <div class="button-position" style="margin-bottom: 0px;padding: 10px;margin-top: 16px;margin-right: 16px;margin-left: 16px;"> <a href="#entenda-modelo"> <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;"> leia sobre todos os modelos > </button> </a> </div> <div class="button-position" style="margin-bottom: 0px;padding: 10px;margin-top: 16px;margin-right: 16px;margin-left: 16px;"> <a href="#entenda-etapa"> <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;"> veja considerações por etapa de ensino > </button> </a> </div> </div> <div id="entenda-modelo" class="info-modal-window" style="width: 80%; height: 70%;"> <div> <a href="#" title="Close" class="info-btn-close" style="color: white;">×</a> <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Modelos</h1> <div style="font-size: 16px; padding: 0px 50px 0px 50px;"> Abaixo há o quadro completo. Caso não consiga ver a imagem, clique na imagem para baixa-la ou <a href="https://drive.google.com/u/1/uc?id=1tqBItM8XkLdY9u2wk0ZcPrVcHccgdp1f&export=download">[AQUI]</a>. </div> <a href="https://drive.google.com/u/1/uc?id=1tqBItM8XkLdY9u2wk0ZcPrVcHccgdp1f&export=download"><img style="padding: 50px 50px 50px 50px;" class="images" src="https://i.imgur.com/ZByy47a.jpg"></a> </div> </div> <div id="entenda-etapa" class="info-modal-window" style="width: 80%; height: 70%;"> <div> <a href="#" title="Close" class="info-btn-close" style="color: white;">×</a> <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Etapas de Ensino</h1> <div style="font-size: 16px; padding: 0px 50px 0px 50px;"> <br> <b>4 - 8 anos</b><br> Pontos principais para consideração:<br> <li>Crianças desta faixa etária possuem menor risco de apresentar sintomas graves.</li> <li>Pais e responsáveis necessitam de creches e suporte para manter demais atividades do dia a dia</li> <li>Eficácia muito baixa do ensino remoto</li><br> <b>8 - 12 anos</b><br> Pontos principais para consideração:<br> <li>Crianças desta faixa etária possuem menor risco de apresentar sintomas graves, mas há maior dificuldade em adotar medidas sanitárias.</li> <li>Já possuem maior autonomia no cotidiano e pode</li><br> <b>12 - 17 anos</b><br> Pontos principais para consideração:<br> <li>Crianças desta faixa etária possuem maior risco intrínseco de contrair e desenvolver sintomas, mas apresentam maior aderência aos protocolos sanitários</li> <li>Logística de agendamento presencial pode ser mais complexa, pois os anos possuem matérias e professores diversos.</li><br> <b>17 - 18 anos</b><br> Pontos principais para consideração:<br> <li>Crianças desta faixa etária possuem maior risco intrínseco de contrair e desenvolver sintomas, mas apresentam maior aderência aos protocolos sanitários.</li> <li>Alta eficácia e adesão ao método remoto</li> <br>Abaixo há o quadro completo. Caso não consiga ver a imagem, clique na imagem para baixa-la ou <a href="https://drive.google.com/u/1/uc?id=1Sj65MXPkRcw6VxojYBLsJ8otIuvpLfq_&export=download">[AQUI]</a>. </div> <a href="https://drive.google.com/u/1/uc?id=1Sj65MXPkRcw6VxojYBLsJ8otIuvpLfq_&export=download"><img style="padding: 50px 50px 50px 50px;" class="images" src="https://i.imgur.com/FyoIFe9.jpg"></a> </div> </div> """, unsafe_allow_html=True, ) with col1_4: st.write( f"""<div class="container"> <br> </div> <br> """, unsafe_allow_html=True, ) st.write( f"""<br> <div class="container" style="padding-left:0px;"> <div class="minor-padding" style="font-size: 20px; color:#FF934A;"><b>2. Escolha quem pode retornar</b></div> </div> """, unsafe_allow_html=True, ) col2a_1, col2a_2, col2a_3, col2a_4 = st.beta_columns( [0.35, 0.05, 0.85, 0.3]) with col2a_1: params["number_students"] = st.number_input( "Quantos estudantes retornam às aulas presenciais?", format="%d", value=data["number_students"].values[0], step=1, ) if params["priority"]: params["number_remote_students"] = st.number_input( "Quantos estudantes acompanham às aulas somente de forma remota?", format="%d", value=data["number_students"].values[0], step=1, ) with col2a_2: st.write( f""" <div class="container main-padding"> <br> </div> """, unsafe_allow_html=True, ) with col2a_3: st.write( f""" <div class="col light-green-simulator-bg card-simulator" style="border-radius:30px;"> <div class="row" style="font-family: 'Roboto Condensed', sans-serif; margin-bottom:0px; padding:10px;"> <b>Iniciamos com total de estudantes reportados no Censo Escolar 2019 (INEP).</b> <br>Você pode alterar esse valor ao lado. Leve em consideração quais grupos de estudantes podem ser vulneráveis ou ter prioridade. </div> <div class="button-position" style="padding-bottom: 15px;"> <a href="#entenda-estudantes"> <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;"> grupos que requerem atencão especial > </button> </a> </div> </div> <div id="entenda-estudantes" class="info-modal-window" style="width: 80%; height: 70%;"> <div> <a href="#" title="Close" class="info-btn-close" style="color: white;">×</a> <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Estudantes</h1> <div style="font-size: 20px; padding: 0px 50px 0px 50px;"> <b>Grupos que requerem atencão especial</b> </div> <br> <div style="font-size: 16px; padding: 0px 50px 0px 50px;"> <b>Exemplos de grupos vulneráveis ou/e marginalizados</b> <li>Minorias</li> <li>Meninas adolescentes</li> <li>Crianças com deficiência de aprendizagem</li> <li>Crianças que vivem em instituições de abrigo</li> <li>Crianças vivendo em condição de pobreza, em residências com alta ocupância ou improvisadas</li> <li>Orfãos</li> <li>Crianças separadas de seus responsáveis</li> <li>Crianças e adolescentes em risco de abandono escolar</li> </div> </div> </div> """, unsafe_allow_html=True, ) with col2a_4: st.write( f"""<div class="container"> <br> </div> <br> """, unsafe_allow_html=True, ) st.write( f""" <div class="container main-padding"> <br> </div> """, unsafe_allow_html=True, ) col2b_1, col2b_2, col2b_3, col2b_4 = st.beta_columns( [0.35, 0.05, 0.85, 0.3]) with col2b_1: params["number_teachers"] = st.number_input( "Quantos professores(as) retornam?", format="%d", value=data["number_teachers"].values[0], step=1, ) col2b_2 = col2a_2 with col2b_3: st.write( f""" <div class="col light-green-simulator-bg card-simulator" style="border-radius:30px;"> <div class="row" style="font-family: 'Roboto Condensed', sans-serif; margin-bottom:0px; padding:10px;"> <b>Iniciamos com total de professores reportados no Censo Escolar 2019 (INEP).</b> <br>Você pode alterar esse valor ao lado. Leve em consideração quais grupos de professores podem ser de risco, confortáveis para retorno e outros. </div> <div class="button-position" style="padding-bottom: 15px;"> <a href="#entenda-professores"> <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;"> como retornar professores(as) > </button> </a> </div> <div id="entenda-professores" class="info-modal-window" style="width: 80%; height: 70%;"> <div> <a href="#" title="Close" class="info-btn-close" style="color: white;">×</a> <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Professores</h1> <div style="font-size: 16px; padding: 0px 50px 0px 50px;"> <b>Fatores a serem considerados:</b> grupos vulneráveis, número de casos suspeitos, desconforto da rede com o retorno presencial, dificuldade logística e a disponibilidade de retorno presencial. <br><br>O quadro explicativo traz para cada fator um desafio e uma ação sugerida. <br><br>Caso não consiga ver a imagem, clique na imagem para baixa-la ou <a href="https://drive.google.com/u/1/uc?id=1lLtbEMau4nIj8tZ5rQF51ThV2Q8K1DzE&export=download">[AQUI]</a>. </div> <a href="https://drive.google.com/u/1/uc?id=1lLtbEMau4nIj8tZ5rQF51ThV2Q8K1DzE&export=download"><img style="padding: 50px 50px 50px 50px;" class="images" src="https://i.imgur.com/4ai7xDK.jpg"></a> </div> </div> </div> """, unsafe_allow_html=True, ) col2b_4 = col2a_4 st.write( f""" <br> <div class="container" style="padding-left:0px;"> <div class="minor-padding" style="font-size: 20px; color:#FF934A;"><b>3. Defina as restrições de retorno</b></div><br> </div> </div> </div> """, unsafe_allow_html=True, ) col3_1, col3_2, col3_3, col3_4, col3_5, col3_6 = st.beta_columns( [0.35, 0.05, 0.4, 0.05, 0.4, 0.3]) with col3_1: params["number_classrooms"] = st.number_input( "Quantas salas de aula disponíveis?", format="%d", value=data["number_classroms"].values[0], step=1, ) st.write( f""" <div class="row" style="margin:0px; padding:10px; background:#DDFBF0; border-radius: 1rem 1rem 1rem 1rem;"> O número de salas restringe o número de turmas que podem voltar de forma simultânea. </div> """, unsafe_allow_html=True, ) col3_2 = col2a_2 with col3_3: params["max_students_per_class"] = st.slider( "Selecione o máximo de estudantes por turma:", 0, 20, 20, 1) st.write( f""" <div class="row" style="margin:0px; padding:10px; background:#DDFBF0; border-radius: 1rem 1rem 1rem 1rem;"> Limitamos em 20 estudantes por sala para diminiuir o risco de transmissão seguindo critérios sanitários. </div> """, unsafe_allow_html=True, ) col3_4 = col2a_2 with col3_5: params["hours_per_day"] = int( st.slider( "Selecione o número de horas presenciais diárias na escola por turma:", min_value=1, max_value=5, value=UNESCO_models[params["education_model"]] ["hours_per_day"], step=1, )) st.write( f""" <div class="row" style="margin:0px; padding:10px; background:#DDFBF0; border-radius: 1rem 1rem 1rem 1rem;"> As restrições sanitárias limitam a quantidade de tempo e estudantes que conseguem retornar à sala de aula. </div> <div class="container"> <br> </div> <br> """, unsafe_allow_html=True, ) col3_6 = col2a_4 with st.beta_expander("simular retorno"): user_analytics = amplitude.gen_user(utils.get_server_session()) opening_response = user_analytics.safe_log_event( "clicked simule retorno", session_state, is_new_page=True) print(params) genSimulationResult(params, config) '''if st.button("Simular retorno"): if st.button("Esconder"): pass genSimulationResult() utils.stylizeButton( name="SIMULAR RETORNO", style_string=""" box-sizing: border-box; border-radius: 15px; width: 150px;padding: 0.5em; text-transform: uppercase; font-family: 'Oswald', sans-serif; background-color: #0097A7; font-weight: bold; text-align: center; text-decoration: none;font-size: 18px; animation-name: fadein; animation-duration: 3s; margin-top: 1.5em;""", session_state=session_state, )''' # TODO: escrever metodologia v1.2 with st.beta_expander("ler metodologia"): user_analytics = amplitude.gen_user(utils.get_server_session()) opening_response = user_analytics.safe_log_event( "clicked simule metodologia", session_state, is_new_page=True) methodology_text = load_markdown_content("methodology_short.md") st.write(methodology_text)
def app(): def text_on_page(dict_var, id_json, list_res, page): if type(dict_var) is dict: for k, v in dict_var.items(): if k == id_json and v == page: if v > page: return list_res list_res.append(dict_var["text"]) elif isinstance(v, dict): text_on_page(v, id_json, list_res, page) elif isinstance(v, list): for item in v: text_on_page(item, id_json, list_res, page) return list_res def get_page(data, page): lines = [] for chunk in data["elements"]: lines.extend(text_on_page(chunk, "page", [], page)) return lines def get_histogram(docs, top=20): tokens = [] for s in docs.values(): tokens += s.split() uniques, counts = np.unique(tokens, return_counts=True) sorted_inds = np.argsort(counts) uniques_sorted = uniques[sorted_inds[-top:]][::-1] counts_sorted = counts[sorted_inds[-top:]][::-1] return (uniques_sorted, counts_sorted) file = st.file_uploader("test", type="pdf", key=2) start = 1 max_val = 1000 end = 5 slider_val = st.slider('Page range:', min_value=start, max_value=max_val, value=(1, end), step=1) if file is not None: file_details = { "FileName": file.name, "FileType": file.type, "FileSize": str(file.size / 1000000) + 'mb' } data_load_state = st.text('Loading data... Thank you for waiting 😊') parser = HierarchyParser() source = FileSource(file, page_numbers=list(range(start - 1, end))) @st.cache(suppress_st_warning=True) def fetch_doc(source): return parser.parse_pdf(source) document = fetch_doc(source) printer = JsonFilePrinter() file_path = pathlib.Path('pdf.json') printer.print(document, file_path=str(file_path.absolute())) with open('pdf.json') as json_file: data = json.load(json_file) json_file.close() pages = { i: get_page(data, i) for i in range(slider_val[0], slider_val[1]) } (formatted_docs, paragraph_page_idx) = preprocessing2.get_formatted_docs( pages, max_paragraphs=5) preprocessed_docs = preprocessing2.get_preprocessed_docs( formatted_docs) data_load_state.text("Done!") st.write(file_details) with st.beta_expander("PDF Extraction details"): st.subheader('First paragraphs on page ' + str(slider_val[0])) if len(pages[slider_val[0]]) >= 5: for i in range(5): st.markdown("<u>¶ " + str(i + 1) + "</u>: " + pages[slider_val[0]][i], unsafe_allow_html=True) else: for i in range(len(pages[slider_val[0]])): st.markdown("<u>¶ " + str(i + 1) + "</u>: " + pages[slider_val[0]][i], unsafe_allow_html=True) st.subheader('PDF word distribution') (uniques, counts) = get_histogram(preprocessed_docs) fig = px.bar(x=uniques, y=counts) fig.update_xaxes(title_text='words') fig.update_yaxes(title_text='occurances') st.plotly_chart(fig) st.subheader('Paragraph similarity heatmap') tfidf_vectorizer = cosine2.get_tfidf_vectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform( list(preprocessed_docs.values())).toarray() query1 = st.text_input("Cosine-SVD Search") if query1: q = cosine2.get_query_vector(query1, tfidf_vectorizer) cos_sims = cosine2.get_cosine_sim(q, tfidf_matrix) (rankings, scores) = cosine2.get_rankings(cos_sims) idx = rankings[0] score = scores[0] page_num = paragraph_page_idx[idx] + 1 doc = formatted_docs[idx] if score > 0.0: st.subheader("Similarity: " + str(score)) st.markdown("<u>Match</u>: " + str(doc), unsafe_allow_html=True) st.markdown("<u>Page Number</u>: " + str(page_num), unsafe_allow_html=True) #write match and query to the db doc_ref = db.collection("queries").document() doc_ref.set({ "query": query1, "topMatch": str(doc), "timeStamp": firestore.SERVER_TIMESTAMP, "upvote": 0 }) else: st.subheader("No matches found.") st.write("Following methods are under construction 😊 Stay tuned!") query2 = st.text_input("Synonymized Query Search") query3 = st.text_input("Verbatim Search") st.subheader("Recent search results:") q_ref = db.collection("queries").order_by( u'timeStamp', direction=firestore.Query.DESCENDING) counter = 0 yesButtons = [] noButtons = [] for doc in q_ref.stream(): counter += 1 doc_dict = doc.to_dict() st.markdown("<strong>Query " + str(counter) + "</strong>: \n", unsafe_allow_html=True) st.markdown("<u>Query</u>: " + doc_dict["query"] + "\n", unsafe_allow_html=True) st.markdown("<u>Top Match</u>: " + doc_dict["topMatch"] + "\n", unsafe_allow_html=True) st.markdown(" ") if doc_dict["upvote"] < 0: st.markdown("<small>So far " + str(abs(doc_dict["upvote"])) + "people don't think it's a good match.</small>", unsafe_allow_html=True) else: st.markdown("<small>So far " + str(doc_dict["upvote"]) + " people think it's a good match.</small>", unsafe_allow_html=True) st.markdown("<i><small>Do you think this is a good match?</small></i>", unsafe_allow_html=True) yesButtons.append(st.button("👍", key="YesButton" + str(counter))) noButtons.append(st.button("👎", key="NoButton" + str(counter))) st.markdown("<hr>", unsafe_allow_html=True) if counter == 5: break st.subheader('made with ❤️ by:') st.markdown( '[Vince Bartle](https://bartle.io) (vb344) | [Dubem Ogwulumba](https://www.linkedin.com/in/dubem-ogwulumba/) (dao52) | [Erik Ossner](https://erikossner.com/) (eco9) | [Qiyu Yang](https://github.com/qiyuyang16/) (qy35) | [Youhan Yuan](https://github.com/nukenukenukelol) (yy435)' )
mn = df[col].min().item() mx = df[col].max().item() defv = df[col].mean() if mx > mn: sp = (mx - mn) / 100 sp = round(sp, 1 - int(floor(log10(abs(sp)))) - 1) if df[col].dtype == np.int64: defv = np.ceil(defv).astype(int).item() sp = np.ceil(sp).astype(int).item() val = st.slider( "{} ({})".format(col, df[col].dtype), min_value=mn, max_value=mx, step=sp, value=defv, ) else: val = mx else: defv = df[col].mode().item() uniq = pd.unique(df[col]).tolist() val = st.selectbox( "{} ({})".format(col, df[col].dtype), options=uniq, index=uniq.index(defv) ) list_form += [val] default_form += [defv]
import numpy as np from scipy import optimize from matplotlib import pyplot as pl def dynes(E, gamma, delta, N0): numerator = E - gamma*1j denominator = np.sqrt( (E-gamma*1j)**2 - delta**2 ) return N0*np.abs(np.real(numerator/denominator)) f = 'stm.csv' stm = pd.read_csv(f, names=['bias', 'didv']) # try: # f = 'C:/Users/Kuri y Rizu/Documents/Synced Folders/UW-Madison/Oxide Lab/UW Papers/RF Q0 paper/stm.csv' # stm = pd.read_csv(f, names=['bias', 'didv']) # except: # f = '/home/chris/Documents/Synced Folders/UW-Madison/Oxide Lab/UW Papers/RF Q0 paper/stm.csv' # stm = pd.read_csv(f, names=['bias', 'didv']) p, cov = optimize.curve_fit(dynes, stm.bias, stm.didv, p0=[0.2, 3, 1]) gamma = st.slider('gamma', min_value=0.0, max_value=1.0, step=0.01, value=float(p[0])) delta = st.slider('delta', min_value=0.0, max_value=4.0, step=0.1, value=float(p[1])) N0 = st.slider('N0', min_value=0.0, max_value=1.2, step=0.01, value=float(p[2])) fit = dynes(stm['bias'], gamma, delta, N0) fig = pl.figure() pl.plot(stm['bias'], stm['didv'], '-o', color='blue') pl.plot(stm['bias'], fit, '-', color='red') st.pyplot(fig)
def write(): """Used to write the page in the app.py file""" with st.spinner("Loading Map ..."): #ast.shared.components.title_awesome("") #Title Awesome Streamlit ausgeblendet # read CSV # CSV for Choropleth Map df = pd.read_csv( "https://raw.githubusercontent.com/hannahkruck/awesome-test/master/Map.csv", encoding="utf8", sep=";") # CSV for Line Map df2 = pd.read_csv( "https://raw.githubusercontent.com/hannahkruck/awesome-test/master/Map.csv", encoding="utf8", sep=";") # Title st.title("Map view") #----------------- Side bar (filter options) ------------------- # Select map (Choropleth or Line Map) selectedMapType = st.sidebar.radio("Map", ('Choropleth Map', 'Line Map')) if selectedMapType == 'Choropleth Map': showChoropleth = True showLine = False else: showLine = True showChoropleth = False # General filter (Age, Gender) st.sidebar.header("Filters") selectedAge = st.sidebar.multiselect( "Select Age", ("under 18", "18 - 34", "35 - 64", "over 65")) selectedGender = st.sidebar.selectbox("Select Gender", ("All", "Male", "Female")) # Special filter for Choropleth Map st.sidebar.header("Filter for Choropleth Map") # Drop down menu for Choropleth Map Information selectedMapChoropleth = st.sidebar.selectbox( "Select Map Information", ('Applications to target countries', 'Applicants by country of origin')) # Information for Choropleth Map based on the chosen map information if 'target' in selectedMapChoropleth: selectedMapChoropleth = 'destinationCountry' selectedCode = 'geoCodeDC' mapColor = 'Blues' else: selectedMapChoropleth = 'homeCountry' selectedCode = 'geoCodeHC' mapColor = 'Reds' # Special filter for Line Map st.sidebar.header("Filter for Line Map") # Select type (show routes of asylum seeker from a particular origin country or to a particular target country) selectedType = st.sidebar.radio("Select type", ('Target country', 'Origin country')) if selectedType == 'Target country': selectedType = df.destinationCountry.unique() countryCategory = 'destinationCountry' namesToShow = 'homeCountry' selectedLon = 'lonDC' selectedLat = 'latDC' else: selectedType = df.homeCountry.unique() countryCategory = 'homeCountry' namesToShow = 'destinationCountry' selectedLon = 'lonHC' selectedLat = 'latHC' # Drop down menu for selected country selectedCountryMapLine = st.sidebar.selectbox("Select country", (selectedType)) #----------------- Website content (Year slider, i-Button) ------------------- # Markdown for i-Button # CSS and HTML Code st.markdown(''' <!-- https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_transition & https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_right--> <style> .tooltip { position: relative; display: inline-block; font-size:1.6rem; } .tooltip .tooltiptext { visibility: hidden; width: 50vw; background-color: #f1f3f7; color: #262730; text-align: justify; border-radius: 6px; padding: 5px; font-size:0.9rem; /* Position the tooltip */ position: absolute; z-index: 1; top: -5px; left: 105%; opacity: 0; transition: opacity 0.8s; } .tooltip:hover .tooltiptext { visibility: visible; opacity: 1; } </style> ''', unsafe_allow_html=True) # Text for tooltip st.markdown(''' <div class="tooltip">ⓘ <span class="tooltiptext"> <b>Choropleth Map</b><br>The Choropleth Map shows the number of asylum applications per country in Europe and the number of refugees per country worldwide for the selected year (see filter 'Select Map Information' for Choropleth Map). <br><br> <b>Line Map</b><br>The Line Map presents the routes of the refugees depending on the selected type. The type 'target country' shows from which countries the asylum seekers originate based on a specific target country. The type 'origin country' indicates where the asylum seekers are fleeing to from a specific country of origin. </span></div> ''', unsafe_allow_html=True) # Slider to choose the year selected_year = st.slider("", (int(df["year"].min())), (int(df["year"].max()))) # Title for map regarding the chosen year st.subheader('Asylum seekers in the year %s' % selected_year) #----------------- Data preparation (general) ------------------- # Remove 'overall' and 'Überseeische Länder und Hoheitsgebiet' for both CSV indexNames = df[df['destinationCountry'] == 'Overall'].index df.drop(indexNames, inplace=True) indexNames = df[df['homeCountry'] == 'Overall'].index df.drop(indexNames, inplace=True) indexNames = df[df['destinationCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df.drop(indexNames, inplace=True) indexNames = df[df['homeCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df.drop(indexNames, inplace=True) indexNames = df2[df2['destinationCountry'] == 'Overall'].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['homeCountry'] == 'Overall'].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['destinationCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['homeCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df2.drop(indexNames, inplace=True) # Delete all cells, except one year (both maps) indexNames = df[df['year'] != selected_year].index df.drop(indexNames, inplace=True) indexNames = df2[df2['year'] != selected_year].index df2.drop(indexNames, inplace=True) #----------------- Data preparation (Choropleth Map) ------------------- # Information for Choropleth Map (df) based on the chosen gender and age df['subtotal'] = 0 # Check selected gender if selectedGender == 'Female': # if an age is selected if selectedAge: # selectedAge is a list of strings # Therefore, we have to check every entry in the list and sum up partial results in new column subtotal for i in selectedAge: if i == 'under 18': df['subtotal'] = df['subtotal'] + df['fu18'] elif i == '18 - 34': df['subtotal'] = df['subtotal'] + df['f18'] elif i == '35 - 64': df['subtotal'] = df['subtotal'] + df['f35'] elif i == 'over 65': df['subtotal'] = df['subtotal'] + df['fo65'] else: # no age is selected, that means the user wants to see all women df['subtotal'] = df['subtotal'] + df['womenTotal'] a = 'subtotal' elif selectedGender == 'Male': if selectedAge: for i in selectedAge: if i == 'under 18': df['subtotal'] = df['subtotal'] + df['mu18'] elif i == '18 - 34': df['subtotal'] = df['subtotal'] + df['m18'] elif i == '35 - 64': df['subtotal'] = df['subtotal'] + df['m35'] elif i == 'over 65': df['subtotal'] = df['subtotal'] + df['mo65'] else: df['subtotal'] = df['subtotal'] + df['menTotal'] a = 'subtotal' else: # if no gender is selected, that means the user wants to see all if selectedAge: for i in selectedAge: if i == 'under 18': df['subtotal'] = df['subtotal'] + df['mu18'] + df[ 'fu18'] elif i == '18 - 34': df['subtotal'] = df['subtotal'] + df['m18'] + df['f18'] elif i == '35 - 64': df['subtotal'] = df['subtotal'] + df['m35'] + df['f35'] elif i == 'over 65': df['subtotal'] = df['subtotal'] + df['fo65'] + df[ 'mo65'] a = 'subtotal' else: a = 'total' # Group the countries by year and sum up the number (total) in a new column sum (df['sum'] df['sum'] = df.groupby([selectedMapChoropleth, 'year'])[a].transform('sum') #----------------- Data preparation (Line Map) ------------------- # countryCategory = homeCountry or destinationCountry # selectedCountryMapLine is the selected country for the map line (for example Syria (homeCountry)) indexNames = df2[df2[countryCategory] != selectedCountryMapLine].index df2.drop(indexNames, inplace=True) df2['subtotal'] = 0 if selectedGender == 'Female': # if an age is selected if selectedAge: # selectedAge is a list of strings # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null for i in selectedAge: if i == 'under 18': indexNames = df2[df2['fu18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['fu18'] elif i == '18 - 34': indexNames = df2[df2['f18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['f18'] elif i == '35 - 64': indexNames = df2[df2['f35'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['f35'] elif i == 'over 65': indexNames = df2[df2['fo65'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['fo65'] else: indexNames = df2[df2['womenTotal'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['womenTotal'] elif selectedGender == 'Male': if selectedAge: # selectedAge is a list of strings # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null for i in selectedAge: if i == 'under 18': indexNames = df2[df2['mu18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mu18'] elif i == '18 - 34': indexNames = df2[df2['m18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m18'] elif i == '35 - 64': indexNames = df2[df2['m35'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m35'] elif i == 'over 65': indexNames = df2[df2['mo65'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mo65'] else: indexNames = df2[df2['menTotal'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['menTotal'] else: # if no gender is selected, that means the user wants to see all if selectedAge: for i in selectedAge: if i == 'under 18': indexNames = df2[df2['mu18'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['fu18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mu18'] + df2[ 'fu18'] elif i == '18 - 34': indexNames = df2[df2['m18'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['f18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m18'] + df2[ 'f18'] elif i == '35 - 64': indexNames = df2[df2['m35'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['f35'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m35'] + df2[ 'f35'] elif i == 'over 65': indexNames = df2[df2['mo65'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['fo65'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mo65'] + df2[ 'fo65'] else: # all people are considered indexNames = df2[df2['total'] == 0].index df2.drop(indexNames, inplace=True) # Create list of origin or target countries to display them in hover text # Every second index must contain the country name, so a placeholder is necessary in front of it # Structur: [placeholder,name+number,placeholder,name+number,...] # name = listPlaceholderNames # number = listPlaceholderNumber listPlaceholderNames = df2[namesToShow].values.tolist() listPlaceholderNumber = df2[a].values.tolist() nameList = [] i = 0 if namesToShow == 'homeCountry': for x in listPlaceholderNames: nameList.append(i) x = x + ': ' + str(listPlaceholderNumber[i]) nameList.append(x) i = i + 1 if len(nameList) != 0: nameList[-2] = None else: for x in listPlaceholderNames: x = x + ': ' + str(listPlaceholderNumber[i]) nameList.append(x) nameList.append(i) i = i + 1 if len(nameList) != 0: nameList[-1] = None st.write( '<style>div.Widget.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True) #----------------Create Maps with Plotly (Choropleth and Line Map)--------------------------- #Link Toggle Map https://plotly.com/python/custom-buttons/ fig = go.Figure() # Choropleth Map fig.add_trace( go.Choropleth( locations=df[selectedCode], visible=showChoropleth, z=df['sum'], text=df[selectedMapChoropleth], colorscale=mapColor, autocolorscale=False, reversescale=False, name="", marker_line_color='darkgray', marker_line_width=0.5, colorbar_tickprefix='', colorbar_title='Number of<br>asylum<br>applications<br>', )) # Line Map fig.add_trace( go.Scattergeo(locationmode='country names', lon=df2[selectedLon], lat=df2[selectedLat], hoverinfo='text', name=selectedCountryMapLine, text=df2[countryCategory], line=dict(width=1, color='red'), opacity=0.510, visible=showLine, mode='markers', marker=dict(size=3, color='rgb(255, 0, 0)', line=dict( width=3, color='rgba(68, 68, 68, 0)', )))) lons = [] lats = [] lons = np.empty(2 * len(df2)) lons[::2] = df2['lonDC'] lons[1::2] = df2['lonHC'] lats = np.empty(2 * len(df2)) lats[::2] = df2['latDC'] lats[1::2] = df2['latHC'] #hallo = 'testi' fig.add_trace( go.Scattergeo(locationmode='country names', visible=showLine, name='route and number <br>of asylum seekers', text=nameList, hovertemplate=nameList, lon=lons, lat=lats, mode='markers+lines', line=dict(width=1, color='red'), opacity=0.5)) fig.update_layout( showlegend=True, geo=go.layout.Geo( scope='world', #projection_type = 'azimuthal equal area', showland=True, showcountries=True, landcolor='rgb(243, 243, 243)', countrycolor='rgb(105,105,105)', ), ) fig.update_layout( geo=dict(showframe=False, showcoastlines=False, projection_type='equirectangular'), autosize=True, margin=dict( l=0, r=0, b=0, t=20, ), ) # Display figure st.plotly_chart( fig, use_container_width=True, config={ 'modeBarButtonsToRemove': ['lasso2d', 'select2d', 'pan2d', 'hoverClosestGeo'] })
def get_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[["CRASH_DATE", "CRASH_TIME"]]) data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis="columns", inplace=True) data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True) return data data = get_data(100000) original_data = data st.header("Where are the most people injured in NYC?") injured_people = st.slider("No of people injured", 0, 19) st.map( data.query("injured_persons >= @injured_people")[["latitude", "longitude" ]].dropna(how="any")) st.header("How many collisions occur during a givn time of a day?") #hour = st.sidebar.slider("Hour to look at", 0, 23) hour = st.slider("Hour to look at", 0, 23) data = data[data['date/time'].dt.hour == hour] midpoint = (np.average(data["latitude"]), np.average(data["longitude"])) st.markdown("Vehicle collision between %i:00 and and %i:00" % (hour, (hour + 1) % 24)) st.write( pdk.Deck(
with row0_1: hist_x = st.selectbox("Select a feature", options=df.columns, index=df.columns.get_loc("age")) with row0_2: bar_mode = st.selectbox("Select barmode", ["relative", "group"], 0) with set_hist_color: hist_color = st.selectbox( 'Select categorical color option', ["sex", 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal', 'target'], 0) hist_bins = st.slider(label="Histogram bins", min_value=5, max_value=50, value=25, step=1, key='h1') # hist_cats = df['Outcome'].sort_values().unique() hist_cats = df[hist_x].sort_values().unique() hist_fig1 = px.histogram(df, x=hist_x, nbins=hist_bins, title="Histogram of " + hist_x, template="plotly_white", color=f'{hist_color}', barmode=bar_mode, color_discrete_map=dict(noDM='green', DM='red'), category_orders={hist_x: hist_cats}) st.write(hist_fig1)
# main.py import streamlit as st from sklearn import datasets from sklearn.cluster import KMeans from sklearn.decomposition import PCA import matplotlib.pyplot as plt iris = datasets.load_iris() # data # begin our app with a markdown text st.markdown("""# Iris Cluster App It's so easy to build Machine Learning applications! """) # add a slider n = st.slider("n clusters: ", min_value=2, max_value=10, value=3) # run model k_means = KMeans(n_clusters=n) cluster_labels = k_means.fit_predict(iris.data) # visualize results pca = PCA(n_components=2).fit_transform(iris.data) # transform to 2D plt.scatter(pca[:, 0], pca[:, 1], c=cluster_labels) st.pyplot() # display graph in app
for value in np.arange(0, 1, .05): if pt >= value and pt < value + .05: p_dict[round(value, 2)] += 1 for key in p_dict.keys(): p_dict[key] /= len(x) return p_dict if page == "Gambler's Ruin II": st.title("Gambler's Ruin II") starting = st.number_input("Enter a dollar amount", min_value=0, value=10) win = st.number_input("Enter the number you would like to stop at", min_value=0, value=20) p = st.slider("Enter the probability of winning (p)", min_value=.01, max_value=1.0, value=.5) n = st.number_input("Enter the number of matches to play", min_value=1, value=100) initial = np.zeros(win + 1) initial[starting] = 1 matrix = np.zeros((win + 1, win + 1)) for loc in range(win + 1): if loc == 0 or loc == win: matrix[loc, loc] = 1 else: matrix[loc, loc + 1] = p matrix[loc, loc - 1] = 1 - p
DATE_COLUMN = 'date/time' DATA_URL = ('https://s3-us-west-2.amazonaws.com/' 'streamlit-demo-data/uber-raw-data-sep14.csv.gz') @st.cache def load_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN]) return data data_load_state = st.text('loading data...') data = load_data(1000) data_load_state.text('loading data... done!') st.write('Done! with cache') if st.checkbox('show raw data'): st.subheader('Raw data') data st.subheader('number of pickups per hour') hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0, 24))[0] st.bar_chart(hist_values) hour_to_filter = st.slider('hour', 0, 23, 17) st.subheader('all pickup data at {}:00'.format(hour_to_filter)) data[data[DATE_COLUMN].dt.hour == hour_to_filter]
def main(): # st.title('Data Viz') # st.subheader('Análise Exploratória de Dados') # Carregando a imagem de título e definindo o texto do header st.image( 'https://sc.movimentoods.org.br/wp-content/uploads/2019/10/qualirede.png', width=400) st.header('Um jeito simples de visualizar e analisar seus dados') st.sidebar.title('Data Viz') # Lendo o arquivo upado pelo usuário file = st.file_uploader('Escolha a base de dados CSV', type='csv') # Checando se o arquivo não está vazio if file is not None: # Inserindo os menus laterais st.sidebar.image( 'https://media.giphy.com/media/1klslCB8tbUmN4QoD4/giphy.gif', width=250) st.sidebar.header("Selecione:") # Checando quantas linhas do dataset o usuário quer ver columns = st.slider('Quantas linhas deseja ver?', min_value=1, max_value=50) # Lendo e exibindo o dataset st.markdown('**Seu arquivo**:') data = pd.read_csv(file) st.dataframe(data.head(columns)) # Menu lateral para chegar o shape do dataset if st.sidebar.checkbox('Quero ver o shape dos meus dados'): st.markdown('**Quantidade de linhas:** ') st.markdown(data.shape[0]) st.markdown('**Quantidade de colunas:**') st.markdown(data.shape[1]) #Menu lateral para chegar visualizar as colunas do dataset if st.sidebar.checkbox('Quero analisar as colunas'): all_columns = data.columns.tolist() selected_columns = st.multiselect('Selecione', all_columns) new_df = data[selected_columns].drop_duplicates(subset=None, keep='first') st.dataframe(new_df) #Menu lateral para contar o número de variáveis if st.sidebar.checkbox('Quero contas a quantidade de target/classes'): selected_plot = st.selectbox('Selecione o tipo de visualização', list(data.columns)) st.markdown('**Contagem de Alvos/Classes**') st.write( data.iloc[:, data.columns.get_loc(selected_plot)].value_counts()) #Menu lateral para ver os tipos de dados if st.sidebar.checkbox('Quero ver os tipos dos dados'): st.markdown('**Tipos de dados**') st.write(data.dtypes) #Menu lateral para ver a descrição dos dados if st.sidebar.checkbox('Quero a descrição dos meus dados'): st.markdown('**Descrição**') st.write(data.describe()) #Menu lateral para selecionar o tipo de gráfico if st.sidebar.checkbox('Quero visualizar meus dados'): columns_names = data.columns.tolist() viz = ('line', 'bar', 'pie', 'hist', 'correlation', 'box') selected_plot = st.sidebar.selectbox( 'Selecione o tipo de visualização', viz) selected_columns_names = st.multiselect('Selecione as colunas', columns_names) # Gráfico de linha if selected_plot == 'line': custom_data = data[selected_columns_names] st.line_chart(custom_data) # Gráfico de barra elif selected_plot == 'bar': custom_data = data[selected_columns_names] st.bar_chart(custom_data) # Gráfico de pizza elif selected_plot == 'pie': st.write( data.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # Gráfico de correlação elif selected_plot == 'correlation': corr = data.corr() st.write( sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True)) st.pyplot() # Outros gráficos else: custom_plot = data[selected_columns_names].plot( kind=selected_plot) st.pyplot() # Menu sobre mim #if st.sidebar.checkbox('Sobre'): html = """ <br><br><br><br><br> <div> Desenvolvido pela equipe <a href="mailto:[email protected]">NGI</a>, na <a href="https://www.qualirede.com.br/">Qualirede</a>. </div></b> """ st.markdown(html, unsafe_allow_html=True)
def presentation(): st.markdown("## 2.2 Mode Choice Model") # Show Probability Matrix if st.checkbox("Show Probability Matrixes"): st.markdown("Probability of choosing mode given origin and destination") probabilities = mp.get_probability() st.markdown("Probability for **CAR**") st.write(pd.DataFrame(probabilities[0], index=["Zone 1", "Zone 2"], columns=["Zone 1", "Zone 2"])) st.markdown("Probability for **PUBLIC TRANSPORT (PT)**") st.write(pd.DataFrame(probabilities[1], index=["Zone 1", "Zone 2"], columns=["Zone 1", "Zone 2"])) st.markdown("Probability for **SLOW**") st.write(pd.DataFrame(probabilities[2], index=["Zone 1", "Zone 2"], columns=["Zone 1", "Zone 2"])) # ------------------------- Write Up for the section ------------------------- # # ---------------------------------------------------------------------------- # st.markdown(""" ## Mode Choice Model The model calculates the probability of each alternative mode of transport given the origin and destination. Hence, we calculate **$Pr(m|i,j)$**, where $m$ is mode and $i$ and $j$ are the origin and destination zones. """) st.write(""" Assuming the utility of choosing the alternative is given by, $$ U_{j,m}^i = V_{j,m}^i + \epsilon_{j,m}^i $$ where, $\epsilon_{j,m}^i$ is I.I.D Gumble distributed error term representing uncertinity. """) st.write(""" The probability that the utility for alternative $m$ given $i$ and $j$ is given by, $$ Pr(m|i,j) = Pr(U_{j,m}^i > U_{j,m'}^i) = \dfrac{\exp^{V_{j,m}^i}}{ \sum_{m'={1,2,3}} \exp^{V_{j,m'}^i} } $$ """) # ------------------------------------ EXERCISES ------------------------------# # ---------------------------------------------------------------------------- # st.markdown("## EXERCISES") # ------------------------------------ Ques2 ------------------------------# if st.checkbox("(ii) Probabilites of each zone given mode"): orig_zone = st.radio("Select Origin Zone", list(ZONE.keys())) dest_zone = st.radio("Select Destination Zone", list(ZONE.keys())) mode = st.radio("Select Mode", list(MODE.keys()), key=1) probability = mp.get_probability(ZONE[orig_zone], ZONE[dest_zone], MODE[mode]) st.markdown("The probability of travelling from **{}** to **{}** using a **{}** is,".format(orig_zone, dest_zone, mode)) st.markdown("$P(m|i,j)$ = **{}**".format(round(probability, 4))) # ------------------------------------ Ques3 ------------------------------# if st.checkbox("(iii) Probability of travelling using choosen mode from zone 1 to any destination"): # Explanation st.write(""" Using the law of large numbers, $$ Pr(m|i) = \sum_{j={1,2}} Pr(m|i,j)*P(j) $$ where, $P(j)$ is 0.5 for each zone and $i = 1$ """) # Answer mode = st.radio("Select Mode", list(MODE.keys()), key=2) zone11 = mp.get_probability(ZONE["Zone 1"], ZONE["Zone 1"], MODE[mode]) zone12 = mp.get_probability(ZONE["Zone 1"], ZONE["Zone 2"], MODE[mode]) st.markdown("The probability of travelling from **Zone 1** using **{}** is,".format(mode)) st.markdown("$P(m|i=1)$ = **{}**".format(round(zone11*0.5 + zone12*0.5, 4))) # ------------------------------------ Ques4 ------------------------------# if st.checkbox("(iv) Effect on probabilites choosing u_mode"): # Explanation st.markdown(""" Let us assume that the utility function is given by, $$ U_{j,m}^i = V_{j,m}^i + \mu_{mode}*\epsilon_{j,m}^i $$ where, $\mu_{mode}$ is a scaling factor for error term """) st.markdown(""" As value for $\mu_{mode}$ increases, uncertinity in the model increases. Hence the probabilties for each mode becomes more and more uncertain. As value for $\mu_{mode}$ decreases, we are more sure of the deterministic values. Hence the probabilties for each mode becomes more and more certain. """) # Answer u_mode = st.slider("Select u_mode value", min_value=float(0.1), max_value=float(10), value=float(1), step=float(0.1)) # Plotting umode_prob = mp.get_probability(u_mode=u_mode) index = ["Car", "Pt", "Slow"] zone11= [umode_prob[i][0][0] for i in range(3)] zone12= [umode_prob[i][0][1] for i in range(3)] zone21= [umode_prob[i][1][0] for i in range(3)] zone22= [umode_prob[i][1][1] for i in range(3)] fig = go.Figure(data=[ go.Bar(name="Zone 11", x=index, y=zone11), go.Bar(name="Zone 12", x=index, y=zone12), go.Bar(name="Zone 21", x=index, y=zone21), go.Bar(name="Zone 22", x=index, y=zone22)]) fig.update_layout(barmode='group', title="Mode Choice probabilities for u_mode: {}".format(u_mode), title_font_size=20) fig.update_yaxes(range=[0, 1], title_text='Probability') st.plotly_chart(fig) # ------------------------------------ Ques5 ------------------------------# if st.checkbox("(v) Effect on probabilites changing number of employees"): # Explanation st.markdown(""" Due to **Equivalent Difference Property**, any change in number of employee does not affect the probablities. """) # Answer zone1_emp = st.slider("Change for Zone 1 employee", min_value=int(-5000), max_value=int(5000), value=int(0), step=int(1000)) zone2_emp = st.slider("Change for Zone 2 employee", min_value=int(-5000), max_value=int(5000), value=int(0), step=int(1000)) # Plotting emp_prob = mp.get_probability(emp_change_z1=zone1_emp, emp_change_z2=zone2_emp) index = ["Car", "Pt", "Slow"] zone11= [emp_prob[i][0][0] for i in range(3)] zone12= [emp_prob[i][0][1] for i in range(3)] zone21= [emp_prob[i][1][0] for i in range(3)] zone22= [emp_prob[i][1][1] for i in range(3)] fig = go.Figure(data=[ go.Bar(name="Zone 11", x=index, y=zone11), go.Bar(name="Zone 12", x=index, y=zone12), go.Bar(name="Zone 21", x=index, y=zone21), go.Bar(name="Zone 22", x=index, y=zone22)]) fig.update_layout(barmode='group', title="Number of Employees - Zone 1: {} Zone 2: {}".format(10_000+zone1_emp, 15_000+zone2_emp), title_font_size=20) fig.update_yaxes(range=[0, 1], title_text='Probability') st.plotly_chart(fig)
@st.cache(persist=True) def load_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']]) data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis="columns", inplace=True) data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True) return data data = load_data(100000) st.header("Where are the most people injured in NYC?") injured_people = st.slider("Number of persons injured in vehicle collisions", 0, 19) st.map( data.query("injured_persons >= @injured_people")[["latitude", "longitude" ]].dropna(how="any")) st.header("How many collisions occur during a given time of day?") hour = st.slider("Hour to look at", 0, 23) original_data = data data = data[data['date/time'].dt.hour == hour] st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24)) midpoint = (np.average(data["latitude"]), np.average(data["longitude"])) st.write( pdk.Deck(
st.pyplot() #Drop the columns selected from the multiselect #Select multiple columns cols_drop = st.multiselect("Select columns to drop", data.columns) #Drop columns data = data.drop(cols_drop, axis=1) #Show the new dataframe #Text to show it's the new dataframe st.text("Once features have been engineered, it looks like this:") #Show the new dataframe st.dataframe(data) #Change the number of clusters you want the data to be broken up into k = st.slider("Select the number of clusters", 2, 10) #A button that clusters when you press it if st.button("Cluster Results"): #Dropping the diagnosis column so that the features can be used to predict it X = data.drop('diagnosis', axis=1).values #Scaling the values of the dataframe X = StandardScaler().fit_transform(X) #Creating a KMeans clusterer called km km = KMeans(n_clusters=k, init="k-means++", n_init=10) #Passing our transformed dataframe into our KMeans clusterer km_pred = km.fit_predict(X) #Plotting the data
########################### #### Cluster 1 - Model #### ########################### st.markdown('---') st.markdown('## **Regional Cluster 1 - Forecast:**') st.markdown("""**Cluster Characteristics:** - Lowest average regional temperature: ($49^oF$) - Low Avg. Consumer Price Index (CPI): $133$ - Highest Sales Cluster - Highest Regional Population """) clust1_model_steps = st.slider('Forecast Range (Weeks):', 1, 51, 12, key='slider1') clust1_model_pred = np.exp( clust1_model.predict(start=cluster_1_test.index[0], end=cluster_1_test.index[0] + clust1_model_steps, exog=test_plus_future_holidays[:clust1_model_steps + 1], dynamic=True, plot_insample=False)) clust_1_model_ci = np.exp(clust1_model.get_forecast(steps=clust1_model_steps+1, exog=test_plus_future_holidays[:clust1_model_steps+1])\ .conf_int(alpha=0.05))
def main(): # ----------------------------- # Initial description st.image('./dataset-cover.png', width=900) st.title('Analysis of e-commerce dataset') st.markdown("""Here, I will introduce some basic descriptive analysis of the Olist brazilian e-commerce dataset. This dataset contains more than 110K orders from 2016 to 2018 with detailed costumer transactions. The dataset consists of **9 files**, which describe orders, products and their categories, user reviews, information about delivery estimate date, payment method, geolocation, and much, much more. \n """) st.markdown("""All datasets are available on [Kaggle](https://www.kaggle.com/olistbr/brazilian-ecommerce) .""") st.markdown("""Initially, I will start using **4 datasets**: *Items, Orders, Products description and Payment.*""") st.markdown("Here, I will cover some basic info about the datasets " "(number of orders, how many variables are available, possible" " payment methods, etc.) and explore some data visualization" " (histogram, bar plot, and boxplot). \n So, let's start!") # ----------------------------- # Import dataset st.header("**Dataset investigation**") st.markdown("Let's take a look on data...") items = pd.read_csv("./olist_dataset/olist_order_items_dataset.csv") orders = pd.read_csv("./olist_dataset/olist_orders_dataset.csv") products = pd.read_csv("./olist_dataset/olist_products_dataset.csv") payment = pd.read_csv("./olist_dataset/olist_order_payments_dataset.csv") # Slider for dataframe.head slider_bar = st.slider(label='Select a number of rows to take a look on datasets?', min_value=1, max_value=10) st.markdown('**Items**') st.markdown("""This dataset describes the relationship among orders, sellers, order price, shipping cost and date.""") st.dataframe(items.head(slider_bar)) st.markdown('**Orders **') st.markdown("""Here, We have info about order status: purchased time, order status, if was already shipped, for example...""") st.dataframe(orders.head(slider_bar)) st.markdown('**Products**') st.markdown("""Products dataset describes products by categories, weight, and dimensions""") st.dataframe(products.head(slider_bar)) st.markdown('**Payment**') st.markdown("""Last but not least, this dataset shows payment value and method.""") st.dataframe(payment.head(slider_bar)) # Columns description add_info = st.checkbox('Want additional info about variables?') if add_info: st.markdown("""Basically, keep in mind that with these variables bellow, we can track orders along the datasets: \n""" "* order_id: identify products that are in the same basket. \n" "* product_id: identify unique products within the dataset. \n" "* customerid: identify unique customers within the dataset. \n" "* seller_id: identify unique sellers within the dataset. \n ") # ----------------------------- # Dataset shape st.header("**Common questions**") cols_box = st.checkbox("How many columns and rows are in datasets?") if cols_box: st.markdown(f"Items: {items.shape}") st.markdown(f"Orders: {orders.shape}") st.markdown(f"Products: {products.shape}") st.markdown(f"Payments: {payment.shape}") order_box = st.checkbox("How many orders are in Order dataset?") if order_box: st.markdown(f"""Total number of orders: {orders['order_id'].nunique()}""") order_customers = st.checkbox("How many customers are?") if order_customers: st.markdown(f"""Total number of customers: {orders['order_id'].nunique()}""") products_box = st.checkbox("What are the product categories?") if products_box: products_categories = products['product_category_name'].unique().tolist() st.write(f'There are {len(products_categories)} categories.') st.dataframe(products_categories) payment_type = st.checkbox("""What are the different payment methods?""") if payment_type: st.markdown(f"""There are {payment['payment_type'].nunique()} payment options:""") st.dataframe(payment['payment_type'].unique()) order_customers = st.checkbox("""What are the possible delievery status on sales orders?""") if order_customers: order_status = orders['order_status'].unique().tolist() st.dataframe(order_status) # ----------------------------- # Merging dataset ---- add code to streamlit st.header("**Descriptive analysis**") st.markdown("""First, let's merge our datasets:""") orders_items = pd.merge(orders, items, on='order_id') products_slice = products.drop(['product_name_lenght', 'product_description_lenght'], axis='columns') merge_df = pd.merge(orders_items, products_slice, on='product_id') merge_df = pd.merge(merge_df, payment, on='order_id') # ----------------------------- # Select columns to .describe() st.markdown("""A common task is to extract basic information about the dataset, as the maximum and minimum value per variable, find the mean, quantiles, etc. So, you can choose some columns to investigate it.""") cols = ['price', 'freight_value', 'product_weight_g', 'product_length_cm', 'product_photos_qty', 'product_length_cm', 'product_height_cm', 'product_width_cm', 'payment_installments', 'payment_value'] columns_box = st.multiselect("""Select columns to calculate max, min, mean, median and quantiles""", cols) if columns_box: df_columns_box = merge_df[columns_box] st.dataframe(df_columns_box.describe().T) # ----------------------------- # Missing data st.markdown("""Another important task is to check types present on the dataset and if exist any missing values. Keep in mind that it is important to handle this effectively, because missing values can impact our interpretation.""") missing = pd.DataFrame({'missing count': merge_df.isnull().sum(), 'dtype': merge_df.dtypes, 'missing %': (merge_df.isnull().sum()/merge_df.shape[0])*100}) st.dataframe(missing.head(25)) # Filling missing data missing_box = st.checkbox("Do you want to fill missing data?") if missing_box: st.markdown("""Great, since we have variables with different types, let's focus focus on numeric types""") # Filling in numeric columns missing_op = st.selectbox('How do you want to fill missing values', ('Mean', '0')) if missing_op == '0': st.markdown('') numeric_cols = ['product_photos_qty', 'product_weight_g', 'product_length_cm', 'product_height_cm', 'product_width_cm'] for col in numeric_cols: merge_df[col] = merge_df[col].fillna(value=0) if missing_op == 'Mean': st.markdown('') numeric_cols = ['product_photos_qty', 'product_weight_g', 'product_length_cm', 'product_height_cm', 'product_width_cm'] for col in numeric_cols: merge_df[col] = merge_df[col].fillna(value=merge_df[col].mean()) # Filling in objetc columns st.markdown("""Also, Let's handle columns which have object' types:""") missing_obj = st.selectbox("""Do you want to drop rows with missing data or ignore it?""", ('Drop', 'Ignore')) if missing_obj == 'Drop': st.markdown('Sorry, this feature is under construction :(') merge_df['product_category_name'] = merge_df['product_category_name'].fillna(value='no_info') # object_cols = ['order_approved_at', 'order_delivered_carrier_date', 'order_delivered_customer_date'] # merge_df = merge_df[merge_df[object_cols].notna()] if missing_obj == 'Ignore': st.markdown('') # Recheck missing data missing_box2 = st.checkbox("Want to check the result?") if missing_box2: missing2 = pd.DataFrame({'missing count': merge_df.isnull().sum(), 'dtype': merge_df.dtypes, 'missing %': (merge_df.isnull().sum()/merge_df.shape[0])*100}) st.dataframe(missing2.head(25)) # ----------------------------- # Data visualization st.header("""**Visualization**""") st.markdown("""Data visualization it's an important task on data analysis, which allows extracting interesting patterns from data and, making it easier to understand. So, Let's start plotting""") # Histogram - columns st.subheader("Histogram") numeric_cols = ['price', 'product_photos_qty', 'product_weight_g', 'freight_value', 'product_length_cm', 'product_height_cm', 'product_width_cm', 'payment_value', 'payment_installments'] hist_col = st.selectbox('What column do you want to create a histogram?:', numeric_cols) if hist_col: fig_hist = px.histogram(merge_df, x=hist_col) st.write(fig_hist) # Boxplot - columns st.subheader("Boxplot") boxplot_col = st.multiselect("""What column do you want to create boxplot?""", numeric_cols) if boxplot_col: fig_boxplot = px.box(merge_df, x=boxplot_col) st.write(fig_boxplot) # Barplot - Products Ordered st.subheader("How many products people generally order?") number_orders = merge_df.groupby('order_id')['order_item_id'].aggregate('sum').reset_index() number_orders = number_orders['order_item_id'].value_counts() number_orders.index += 1 fig_bar = px.bar(number_orders, x=number_orders.index, y=number_orders.values) st.write(fig_bar) # Barplot - Most bought products st.subheader("**Which categories people buy at most?**") categories_prods = merge_df.groupby('product_category_name').count().reset_index().sort_values('order_id') fig_bar_p = px.bar(categories_prods, y='product_category_name', x='order_id', orientation='h') st.write(fig_bar_p) # Money spent # Barplot - Payment methods st.subheader("What is the most common payment method?") pay_type = merge_df.groupby('payment_type')['order_id'].count().reset_index() pay_type = pay_type.sort_values(by='order_id', ascending=False) pay_type = pay_type.rename(columns={'order_id': 'value_count'}) fig_pay = px.bar(pay_type, y='value_count', x='payment_type', orientation='v') st.write(fig_pay) # ----------------------------- # The end! st.header("That's all folks!") # st.balloons() st.markdown("""This work was developed using these excellent Kaggle repositories [A] (https://www.kaggle.com/gsdeepakkumar/e-commerce-dataset-analysis/notebook), [B] (https://www.kaggle.com/kabure/simple-eda-sales-and-customer-patterns/notebook). \n So, if you want to dive in this dataset, you totally should check them.""") st.markdown("""Thank you so much for checking my job! If you liked, please, check my [github] (https://github.com/cavalcante-l?tab=repositories) and my [linkedin] (https://www.linkedin.com/in/laizacavalcante/). """) st.markdown("Developed by Laíza Cavalcante.")
def write(): st.markdown(""" # SugarTime ### Model Performance This page lets you visualize how the model performs on data that it hasn't seen yet. """) with st.beta_expander("CLICK HERE to expand discussion"): st.markdown(""" The dataset is split into two sets: a training set and a testing set. The model has been trained on the training set, and we can use the model to perform inference on data from the testing set here. The time series model is auto-regressive with exogenous variables (ARX). The base algorithm used in such a model can be any regression algorithm; here I currently use a support vector machine. The full model actually consists of several models, each individually fit to a different lag of the target variable. In other words, there is one model fit to the glucose data at time *t+1*, another fit to the glucose data at time *t+2*, another at *t+3*, etc., all the way up to the selected horizon of the model (which defaults to 12 steps of 5 minutes each, i.e., one hour). Each model represents the best performing model after optimizing the time-series design hyperparameters (e.g., order of the *endogenous* or *target* variable, order of the *exogenous* variables, and/or delay of the exogenous variables) at that time step. Note that this model has essentially learned to revert to the mean. Since there is considerable autocorrelation in data from continuous glucose monitors, inference becomes less acurrate as the inference step gets farther away from the current time *t*. Here, instead of relying on the exogenous variables (i.e., carbohydrates and insulin), the model does a better job by increasingly bringing the predicted value back to the mean, which for this patient is a blood glucose level of approximately 100 mg/dL. This is obviously not what we want the model to learn. But I have yet to find an estimator/algorithm that doesn't converge on this strategy to some extent, which suggests that these two exogenous variables are simply not predictive enough to account for significant variance beyond the autoregressive component of this model. """) st.markdown(""" *Instructions:* Use the slider to select a time within the test set. The model will use the data up to that point to generate a forecast for the next hour. *** """) st.markdown("# Select date/time to show forecast.") # load patient data and fit model vm = load_saved_model() patient = vm.patient # make datetime selection slider x_index = patient.Xtest.index start_time = st.slider( "Move the slider to select the forecast date/time", min_value=x_index[40].to_pydatetime(), max_value=x_index[-40].to_pydatetime(), value=x_index[45].to_pydatetime(), step=timedelta(minutes=60), format="MM/DD/YY - hh:mm", ) # plot glucose values for the test set fig = plot_test_set(patient, start_time) st.plotly_chart(fig) # plot performance of model st.markdown("# Show forecast vs actual") start_time_index = (x_index == pd.Timestamp(start_time)).argmax() nsteps = vm.horizon ypred = vm.multioutput_forecast(patient.Xtest[:start_time_index], patient.ytest[:start_time_index]) idx = pd.date_range( start=start_time, end=start_time + timedelta(minutes=5 * (len(ypred) - 1)), freq="5T", ) ypred = pd.DataFrame(ypred, columns=["ypred"], index=idx) fig = core.plot_forecast( patient.ytest[(start_time_index - 40):(start_time_index + nsteps)], ypred, return_flag=True, ) start_time_text = datetime.datetime.strftime(start_time, "%m/%d/%m %H:%M") fig.update_layout( title={ "text": "start time: " + start_time_text, "y": 0.88, "x": 0.5, "xanchor": "center", "yanchor": "top", }) st.plotly_chart(fig)
def show_results(): st.write(results_text) # MAE graph image_mae = Image.open(str(main_path / 'Results/image/mae.png')) st.image(image_mae, caption='Figure 1: Mean Absolute Error') # RMSE graph image_rmse = Image.open(str(main_path / 'Results/image/rmse.png')) st.image(image_rmse, caption='Figure 2: Root Mean Square Error') # R Square praph image_r_square = Image.open(str(main_path / 'Results/image/r_square.png')) st.image(image_r_square, caption='Figure 3: R Square') # Random Forest slider graph st.write("Random Forest model performance:") year_rf = st.slider('Number of years after publication?', 1, 10) year_image = 'Results/image/' + rf_images[year_rf] rf_image = Image.open(str(main_path / year_image)) st.image(rf_image, caption='Figure 4: Random Forest Citation Frequency Histogram', width=600) # Random Forest performance table st.write( "The following are the performance matrix for Random Forest Regression model:" ) performance_df = pd.DataFrame(np.array( [[1.168999, 1.89990, 2.66408, 3.43690, 4.19492, 5.62216, 6.94813], [ 43.38860, 71.76869, 124.98958, 216.97061, 356.03481, 86.33918, 1421.48745 ], [6.58700, 8.47164, 11.17987, 14.72992, 18.86888, 28.04174, 37.70262], [0.16119, 0.22493, 0.27972, 0.303861, 0.30706, 0.29510, 0.22525]]), columns=[ '1 year', '2 year', '3 year', '4 year', '5 year', '7 year', '10 year' ]) performance_df['index'] = ["MAE", "MSE", "RMSE", "R Square"] performance_df = performance_df.set_index("index") st.table(performance_df) # Benchmark Models slider graphs st.write("Benchmark model performance:") year_lr = st.slider('Number of years after publication?', 0, 10) col1, col2, col3 = st.beta_columns(3) with col1: lr_year_image = 'Results/image/lr/' + lr_images[year_lr] lr_image = Image.open(str(main_path / lr_year_image)) st.image(lr_image, caption='Figure 5: LR Citation Frequency Histogram') with col2: svm_year_image = 'Results/image/svm/' + svm_images[year_lr] svm_image = Image.open(str(main_path / svm_year_image)) st.image(svm_image, caption='Figure 6: SVM Citation Frequency Histogram') with col3: km_year_image = 'Results/image/kmeans/' + km_images[year_lr] km_image = Image.open(str(main_path / km_year_image)) st.image(km_image, caption='Figure 7: K-Means Citation Frequency Histogram')
def in_bounding_box(point): lng, lat = point in_lng_bounds = DOWNTOWN_BOUNDING_BOX[0] <= lng <= DOWNTOWN_BOUNDING_BOX[2] in_lat_bounds = DOWNTOWN_BOUNDING_BOX[1] <= lat <= DOWNTOWN_BOUNDING_BOX[3] return in_lng_bounds and in_lat_bounds df = pd.read_csv(DATA_URL) # Filter to bounding box df = df[df['LAT_DESTINO'] < -33] st.write(df) df = df[df['NUM_EST'] < 23] num_est = st.slider("Cantidad de estudiantes traspasados entre colegios", 1, 30) GREEN_RGB = [0, 255, 0, 40] RED_RGB = [240, 100, 0, 40] arc_layer = pydeck.Layer( "ArcLayer", data=df.query("NUM_EST >= @num_est"), get_width="NUM_EST * 2", get_source_position=["LON_ORIGEN", "LAT_ORIGEN"], get_target_position=["LON_DESTINO", "LAT_DESTINO"], get_tilt=15, get_source_color=RED_RGB, get_target_color=GREEN_RGB, pickable=True, auto_highlight=True, )