def test_stop(self): with pytest.raises(StopException) as exc_message: st.stop()
def cs_body(): # Magic commands col1, col2, col3 = st.beta_columns(3) col1.subheader('Magic commands') col1.code('''# Magic commands implicitly `st.write()` \'\'\' _This_ is some __Markdown__ \'\'\' a=3 'dataframe:', data ''') # Display text col1.subheader('Display text') col1.code(''' st.text('Fixed width text') st.markdown('_Markdown_') # see * st.latex(r\'\'\' e^{i\pi} + 1 = 0 \'\'\') st.write('Most objects') # df, err, func, keras! st.write(['st', 'is <', 3]) # see * st.title('My title') st.header(My header') st.subheader('My sub') st.code('for i in range(8): foo()') * optional kwarg unsafe_allow_html = True ''') # Display data col1.subheader('Display data') col1.code(''' st.dataframe(my_dataframe) st.table(data.iloc[0:10]) st.json({'foo':'bar','fu':'ba'}) ''') # Display charts col1.subheader('Display charts') col1.code(''' st.line_chart(data) st.area_chart(data) st.bar_chart(data) st.pyplot(fig) st.altair_chart(data) st.vega_lite_chart(data) st.plotly_chart(data) st.bokeh_chart(data) st.pydeck_chart(data) st.deck_gl_chart(data) st.graphviz_chart(data) st.map(data) ''') # Display media col1.subheader('Display media') col1.code(''' st.image('./header.png') st.audio(data) st.video(data) ''') # Display interactive widgets col2.subheader('Display interactive widgets') col2.code(''' st.button('Hit me') st.checkbox('Check me out') st.radio('Radio', [1,2,3]) st.selectbox('Select', [1,2,3]) st.multiselect('Multiselect', [1,2,3]) st.slider('Slide me', min_value=0, max_value=10) st.select_slider('Slide to select', options=[1,'2']) st.text_input('Enter some text') st.number_input('Enter a number') st.text_area('Area for textual entry') st.date_input('Date input') st.time_input('Time entry') st.file_uploader('File uploader') st.beta_color_picker('Pick a color') ''') col2.write('Use widgets\' returned values in variables:') col2.code(''' >>> for i in range(int(st.number_input('Num:'))): foo() >>> if st.sidebar.selectbox('I:',['f']) == 'f': b() >>> my_slider_val = st.slider('Quinn Mallory', 1, 88) >>> st.write(slider_val) ''') # Control flow col2.subheader('Control flow') col2.code(''' st.stop() ''') # Lay out your app col2.subheader('Lay out your app') col2.code(''' st.beta_container() st.beta_columns(spec) >>> col1, col2 = st.beta_columns(2) >>> col1.subheader('Columnisation') st.beta_expander('Expander') >>> with st.beta_expander('Expand'): >>> st.write('Juicy deets') ''') # Display code col2.subheader('Display code') col2.code(''' st.echo() >>> with st.echo(): >>> st.write('Code will be executed and printed') ''') # Display progress and status col3.subheader('Display progress and status') col3.code(''' st.progress(progress_variable_1_to_100) st.spinner() >>> with st.spinner(text='In progress'): >>> time.sleep(5) >>> st.success('Done') st.balloons() st.error('Error message') st.warning('Warning message') st.info('Info message') st.success('Success message') st.exception(e) ''') # Placeholders, help, and options col3.subheader('Placeholders, help, and options') col3.code(''' st.empty() >>> my_placeholder = st.empty() >>> my_placeholder.text('Replaced!') st.help(pandas.DataFrame) st.get_option(key) st.set_option(key, value) st.beta_set_page_config(layout='wide') ''') # Mutate data col3.subheader('Mutate data') col3.code(''' DeltaGenerator.add_rows(data) >>> my_table = st.table(df1) >>> my_table.add_rows(df2) >>> my_chart = st.line_chart(df1) >>> my_chart.add_rows(df2) ''') # Optimize performance col3.subheader('Optimize performance') col3.code(''' @st.cache >>> @st.cache ... def foo(bar): ... # Mutate bar ... return data >>> # Executes d1 as first time >>> d1 = foo(ref1) >>> # Does not execute d1; returns cached value, d1==d2 >>> d2 = foo(ref1) >>> # Different arg, so function d1 executes >>> d3 = foo(ref2) ''') return None
def main(): # Title st.title("Model Experimentation with MLflow") # Choose dataset df_raw = load_data() st.write(df_raw.head()) st.header("Dendrogram") corr = np.round(spearmanr(df_raw.drop('label', axis=1)).correlation, 4) corr_condensed = hc.distance.squareform(1 - corr) z = hc.linkage(corr_condensed, method='average') fig_den = plt.figure(figsize=(16, 10)) dendrogram = hc.dendrogram(z, labels=df_raw.drop('label', axis=1).columns, orientation='left', leaf_font_size=16) st.pyplot(fig_den, clear_figure=True) st.header("Correlation Matrix") fig_cor = plt.figure(figsize=(16, 10)) sns.heatmap(df_raw.corr()) st.pyplot(fig_cor, clear_figure=True) st.header("Boxplots") fig_box1 = plt.figure(figsize=(20, 5)) sns.boxplot(data=df_raw) st.pyplot(fig_box1, clear_figure=True) q = st.slider("", 0.9, 1.0, 0.99, 0.01) df_raw = filter_df(df_raw, q) fig_box2 = plt.figure(figsize=(20, 5)) sns.boxplot(data=df_raw) st.pyplot(fig_box2, clear_figure=True) # Model selection models = { 'Logistic Regression': LogisticRegression(max_iter=2000, n_jobs=4, random_state=42), 'Random Forest': RandomForestClassifier(n_jobs=4, random_state=42), 'SVC': SVC(random_state=42), 'KNNeighbors': KNeighborsClassifier(n_jobs=4) } # Feature selection feature_options = df_raw.columns.drop('label').tolist() feature_choice = st.multiselect("Choose features to drop", feature_options) treatment_options = { 'None': no_op, 'StandardScaler': scal_features, 'PCA': pca_features } treatment_choice = st.selectbox("Choose feature treatment", list(treatment_options.keys())) clear_mlflow = st.checkbox("Clear mlflow experiments?") clear_mlflow = st.button("Clear MLFlow") if clear_mlflow: exp = mlflow.get_experiment_by_name('model_selection') if exp != None and exp.lifecycle_stage != 'deleted': st.write('Previous experiment exists') mlflow.delete_experiment(exp.experiment_id) st.write(f'Archiving experiment with id {exp.experiment_id}') subprocess.run([f'ls -la mlruns/.trash/{exp.experiment_id}'], shell=True, check=True) try: subprocess.run('rm -rf mlruns/.trash/*', shell=True, check=True) except subprocess.CalledProcessError as e: st.write(e) exit(-1) experiment_id = mlflow.create_experiment('model_selection') else: experiment_id = mlflow.set_experiment('model_selection') # Mlflow tracking track_with_mlflow = st.checkbox("Track with mlflow?") # Model training start_training = st.button("Start training") if not start_training: st.stop() y = df_raw['label'].copy() sub_df = df_raw.drop([*feature_choice, 'label'], axis=1) X = sub_df.copy() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) res = pd.DataFrame({'model': [], 'f1': []}) sc = make_scorer(f1_score, pos_label='AF') if track_with_mlflow and clear_mlflow: mlflow.end_run() exp = mlflow.get_experiment_by_name('model_selection') if exp != None and exp.lifecycle_stage != 'deleted': st.write('Previous experiment exists') mlflow.delete_experiment(exp.experiment_id) st.write(f'Archiving experiment with id {exp.experiment_id}') subprocess.run([f'ls -la mlruns/.trash/{exp.experiment_id}'], shell=True, check=True) try: subprocess.run('rm -rf mlruns/.trash/*', shell=True, check=True) except subprocess.CalledProcessError as e: st.write(e) exit(-1) experiment_id = mlflow.create_experiment('model_selection') else: experiment_id = mlflow.set_experiment('model_selection') for name, model in models.items(): if track_with_mlflow: # mlflow.set_experiment(experiment_id) mlflow.start_run() mlflow.log_param('features', list(X.columns)) mlflow.log_param('model', name) X_train, X_test = treatment_options[treatment_choice](X_train, X_test) st.write(f'Training {name}') scores = cross_val_score(model, X_train, y_train, cv=4, scoring=sc, n_jobs=4) model.fit(X_train, y_train) # Model evaluation preds_test = model.predict(X_test) metric_name = "f1_score" metric_test = f1_score(y_test, preds_test, pos_label='AF') # st.write(f"{metric_name}_train", round(metric_train, 3)) # st.write(f"{metric_name}_test", round(metric_test, 3)) res = res.append({ 'model': f"{name}", 'f1': scores.mean() }, ignore_index=True) if track_with_mlflow: mlflow.log_metric(metric_name + "_test", scores.mean()) tracking_url_type_store = urlparse( mlflow.get_tracking_uri()).scheme # Model registry does not work with file store if tracking_url_type_store != "file": mlflow.sklearn.log_model(model, "model", registered_model_name="AF_Classifier") else: mlflow.sklearn.log_model(model, "model") mlflow.end_run() st.write(res.sort_values('f1', ascending=False))
def handle_edge_url(url_params: dict, pathSession): '''Display tables associated with a link''' namespace = url_params.get('namespace', [""])[0] hostname = url_params.get('hostname', [""])[0] nhip = url_params.get('nhip', [""])[0] ipLookup = url_params.get('ipLookup', [""])[0] vtepLookup = url_params.get('vtepLookup', [""])[0] vrf = url_params.get('vrf', [""])[0] ifhost = url_params.get('ifhost', [""])[0] macaddr = url_params.get('macaddr', [""])[0] oif = url_params.get('oif', [""])[0] if not hostname: st.error('No hostname found to display information for') st.stop() st.header(f'Debug Tables for Path from {pathSession.source} to ' f'{pathSession.dest}') hoptype = 'Bridged' if macaddr else 'Routed' st.subheader(f'{hoptype} hop between {hostname} and {ifhost}') pathobj = getattr(pathSession, 'pathobj', None) engobj = pathobj.engine_obj if ipLookup: if not vtepLookup or (ipLookup != vtepLookup): st.info(f'Route Lookup on {hostname}') st.dataframe(data=engobj._rdf.query( f'hostname=="{hostname}" and vrf=="{vrf}"')) if vtepLookup: st.info(f'Underlay Lookup on {hostname} for {vtepLookup}') vtepdf = engobj._underlay_dfs.get(vtepLookup, pd.DataFrame()) if not vtepdf.empty: st.dataframe(data=vtepdf.query( f'hostname=="{hostname}" and vrf=="default"')) if nhip: st.info( f'ARP/ND Table on {hostname} for nexthop {nhip}, oif={oif}') arpdf = engobj._arpnd_df.query(f'hostname=="{hostname}" and ' f'ipAddress=="{nhip}" and ' f'oif=="{oif}"') st.dataframe(data=arpdf) if not arpdf.empty: if ':' in nhip: dropcol = ['ipAddressList'] else: dropcol = ['ip6AddressList'] nhmac = arpdf.macaddr.iloc[0] if nhmac: if_df = engobj._if_df.query(f'macaddr=="{nhmac}" and ' f'hostname=="{ifhost}"') \ .drop(columns=dropcol) label = (f'matching nexthop {nhip}, macaddr {nhmac} on ' f'host {ifhost}') else: label = f'matching nexthop {nhip} on host {ifhost}' if_df = engobj._if_df.query(f'hostname=="{ifhost}"') \ .drop(columns=dropcol) if nhip != '169.254.0.1': st.info(f'Interfaces {label}') s = if_df.ipAddressList.str \ .startswith(f'{nhip}/') \ .dropna() s = s.loc[s == True] st.dataframe(data=engobj._if_df.iloc[s.loc[s == True].index]) else: st.info(f'Interfaces {label}') st.dataframe(data=if_df) if macaddr: with st.beta_expander(f'MAC Table for {hostname}, MAC addr {macaddr}', expanded=True): st.dataframe(data=pathobj.engine_obj._macsobj.get( namespace=namespace, hostname=hostname, macaddr=macaddr))
def main(): logging.info("Main script is refreshed...") # Custom functionality for ensuring changing widgets do not cause previous sections to rests state = get_state() st.title("What Makes a Playlist Successful?") st.write( "**This application trains & evaluates playlist success classification models, " "and generates SHAP visualizations for analyzing feature importance**") st.write( "[Created By: Alexander Wong](https://www.linkedin.com/in/alexrobwong/)", unsafe_allow_html=True, ) if st.checkbox("Click to watch recorded demo"): st.video("https://www.youtube.com/watch?v=dPsGxb9lTUY") # Sidebar Inputs ------------------------------------------------------------------------------------------------- experiment_name_input = st.sidebar.text_input("Experiment name:") experiment_name = f"{experiment_name_input}_{str(datetime.now())}" genre_options = GENRES default_ix = GENRES.index("Dance & House") selected_genre = st.sidebar.selectbox("Select genre:", options=genre_options, index=default_ix) # selected genre must be a list genre = [selected_genre] users_threshold = st.sidebar.number_input( "Minimum monthly number of Users:", min_value=10, ) success_threshold = (st.sidebar.slider( "Streaming-ratio success threshold (%):", min_value=1, max_value=99, value=70, ) / 100) holdout_fraction = (st.sidebar.slider( "Test Size (%):", min_value=1, max_value=30, value=5) / 100) model_map = { "Extreme Gradient Boosting": "xgboost", "Decision Tree Classifier": "dt", "Extra Trees Classifier": "et", "Light Gradient Boosting Machine": "lightgbm", "Random Forest Classifier": "rf", } model_selection = list( st.sidebar.multiselect("Models to train:", options=list(model_map.keys()))) optionals = st.sidebar.beta_expander( "Additional Feature Engineering Parameters", False) polynomials_box = optionals.checkbox("Feature Polynomials") interactions_box = optionals.checkbox("Feature Interactions") ratios_box = optionals.checkbox("Feature Ratios") if polynomials_box: polynomials = True else: polynomials = False if interactions_box: interactions = True else: interactions = False if ratios_box: ratios = True else: ratios = False # Experiment & Model Training ------------------------------------------------------------------------------------- train = st.checkbox("Click to train models") if train: # Application can only be run start to finish if xgboost is selected...add it to the list of options exb_added = False if "Extreme Gradient Boosting" not in model_selection: model_selection.append("Extreme Gradient Boosting") exb_added = True # Bugfix - must select at least two models to train other wise model object is used instead of index lgb_added = False if "Light Gradient Boosting Machine" not in model_selection: model_selection.append("Light Gradient Boosting Machine") lgb_added = True include_models = [model_map[x] for x in list(model_selection)] # Check that models are selected - if none are selected, all models will be trained (undesired app behavior) if len(include_models) == 0 or include_models is None: raise Exception( "No models were selected. Please re-start the application") base_frame = pd.read_parquet("data/streamlit_data.parquet") state.genre_frame = base_frame.loc[lambda f: f["genre_1"].isin(genre)] labelled_frame = classify_success(state.genre_frame, users_threshold, success_threshold) train_frame, holdout_frame = create_holdout( labelled_frame, holdout_fraction=holdout_fraction) # PyCaret setup to train models if not state.experiment_complete: with st.spinner("Model Training in Progress"): if exb_added: st.success( "**Extreme Gradient Boosting Model** automatically added by default into model pipeline" ) if lgb_added: st.success( "**Light Gradient Boosting Machine Model** automatically added by default into model pipeline" ) setup( data=train_frame, numeric_features=MODEL_NUMERICAL_FEATURES, categorical_features=MODEL_CATEGORICAL_FEATURES, target="success_streaming_ratio_users", ignore_features=["playlist_uri"], test_data=holdout_frame, session_id=123, ignore_low_variance=True, remove_outliers=True, fix_imbalance=True, remove_multicollinearity=True, log_experiment=True, log_data=True, fold=2, n_jobs=-1, combine_rare_levels=True, experiment_name=experiment_name, silent=True, feature_interaction=interactions, feature_ratio=ratios, polynomial_features=polynomials, ) state.list_models = compare_models(n_select=5, round=3, cross_validation=False, include=include_models) state.experiment_complete = True state.X_train = get_config(variable="X_train") state.y_train = get_config(variable="y_train") state.view = pd.merge(state.y_train, state.X_train, left_index=True, right_index=True).reset_index(drop=True) # Display model training results st.header("Model Training & Testing Results") exp = pull() st.dataframe(exp) st.info("**Models were trained using default parameters**") st.info( "To improve individual model performance," "please consider offline **hyperparameter tuning** techniques such as **Grid Search**. " "To improve overall performance, please consider advanced offline **ensembling** techniques " "such as **Bagging**, **Boosting**, **Stacking**") # Model Definitions models_expander = st.beta_expander("Model Definitions") models_expander.write( "[**Decision Tree Classifier**](https://en.wikipedia.org/wiki/Decision_tree_learning)" ) models_expander.write( "A Decision Tree is a simple representation for " "classifying examples, a form of Supervised Machine Learning where the data is " "continuously split according to a certain parameter. A decision tree starts with a " "single node, which branches into possible outcomes. Each of those outcomes " "leads to additional nodes, which branch off into other possibilities" ) models_expander.write("") models_expander.write( "[**Random Forest Classifier**](https://en.wikipedia.org/wiki/Random_forest)" ) models_expander.write( "An ensemble learning method" "that operates by constructing a multitude of decision trees at training time, " "where each tree is trained on a bootstrap replica of the training data and final " "model classification is decide via majority vote from the constituent trees" ) models_expander.write("") models_expander.write( "[**Extra Trees Classifier**](https://quantdare.com/what-is-the-difference-between" "-extra-trees-and-random-forest/)") models_expander.write( "Extremely randomized trees is similar to Random Forest, " "in that it builds multiple trees and splits nodes using random subsets of features, " "but with two key differences: it does not bootstrap observations (meaning it samples " "without replacement), and nodes are split on random splits, not best splits" ) models_expander.write("") models_expander.write( "[**Extreme Gradient Boosting**](https://en.wikipedia.org/wiki/Gradient_boosting)" ) models_expander.write( "Boosting is a technique which combines a learning " "algorithm in series to achieve a strong learner from many sequentially connected " "weak learners. In case of gradient boosted decision trees algorithm, " "the weak learners are decision trees where each tree attempts to minimize the errors " "of previous tree. Trees in boosting are weak learners but adding many trees in series a" "and each focusing on the errors from previous one make boosting a " "highly efficient and accurate model") models_expander.write("") models_expander.write( "[**Light Gradient Boosting Machine**](https://lightgbm.readthedocs.io/en/latest/)" ) models_expander.write( "A gradient boosting framework for machine " "learning originally developed by Microsoft. Similar to Extreme Gradient Boosting, " "it is based on decision tree algorithms, however unlike Extreme Gradient Boosting, " "the algorithm splits the tree leaf wise instead of level wise") models_expander.write("") # Model Evaluation Metrics metrics_expander = st.beta_expander("Model Evaluation Metrics") metrics_expander.write("**Accuracy**") metrics_expander.write( "Accuracy is defined as the percentage of correct predictions for the test data." " It can be calculated easily by dividing the number of correct predictions by the " "number of total predictions.") metrics_expander.write("") metrics_expander.write("**AUC**") metrics_expander.write( "An ROC curve (receiver operating characteristic curve) is a graph showing the " "performance of a classification model at all classification thresholds. This curve " "plots the True Positive Rate (TP) and False Negative Rate (FP)") metrics_expander.write("") metrics_expander.write("**Recall**") metrics_expander.write( "Recall is defined as the fraction of examples which were predicted to belong " "to a class with respect to all of the examples that truly belong in the class." ) metrics_expander.write("") metrics_expander.write("**Precision**") metrics_expander.write( "Precision is defined as the fraction of relevant examples (true positives) among " "all of the examples which were predicted to belong in a certain class." ) metrics_expander.write("") metrics_expander.write("**F1**") metrics_expander.write( "The traditional F-measure or balanced F-score (F1 score) is the harmonic mean " "of precision and recall and is calculated as --> F1 score = 2 * (Precision * Recall) / " "(Precision + Recall)") metrics_expander.write("") metrics_expander.write("**Kappa**") metrics_expander.write( "The Kappa statistic (or value) is a metric that compares an Observed Accuracy with " "an Expected Accuracy (random chance). The kappa statistic is used not only to evaluate " "a single classifier, but also to evaluate classifiers amongst themselves. In addition, " "it takes into account random chance (agreement with a random classifier), which" " generally means it is less misleading than simply using accuracy as a metric " "(an Observed Accuracy of 80% is a lot less impressive with an Expected Accuracy of " "75% versus an Expected Accuracy of 50%)") metrics_expander.write("") metrics_expander.write("**MCC**") metrics_expander.write( "Unlike the other metrics discussed above, MCC takes all the cells of the Confusion" " Matrix into consideration in its formula --> MCC = TP * TN – FP * FN / √ (TP +FP) * " "(TP + FN) * (TN + FP) * (TN + FN) .Similar to Correlation Coefficient, the range of " "values of MCC lie between -1 to +1. A model with a score of +1 is a perfect model " "and -1 is a poor model. This property is one of the key usefulness of MCC as it" " leads to easy interpretability.") metrics_expander.write("") # Additional model data opts = st.beta_expander("Additional Model Data", False) # Download the training data as an excel file if opts.button("Display Link to Download Model Training Data"): st.markdown(get_table_download_link(state.view), unsafe_allow_html=True) # Prompt to launch MLFlow if opts.button("Display Link to Spotify Model Training History"): st.info( "Note that this application uses MLFlow only when both the application and MLFlow are " "deployed locally") # Overall importance ------------------------------------------------------------------------------------------ st.write("") # Intentional extra blank spaces st.write("") st.header(f"Success Drives for {selected_genre} Playlists") dict_models = {} for i, model in enumerate(exp.index): dict_models[model] = i user_selected_model = st.selectbox( "Select model to view feature importance:", exp.index) state.importance = st.checkbox("Click to calculate feature importance") if state.importance and state.experiment_complete: state.new_selected_model = state.list_models[ dict_models[user_selected_model]] st.write("**Model parameters: **") st.write(state.new_selected_model) st.write("") st.write("**Generating Visualizations...**") bar = st.progress(0) if state.selected_model != state.new_selected_model: state.selected_model = state.new_selected_model state.explainer = shap.TreeExplainer(state.selected_model) state.shap_values = state.explainer.shap_values( state.X_train.to_numpy()) bar.progress(25) # Overall Feature Importance ------------------------------------------------------------------------- st.subheader("Success Drivers - Average") st.pyplot( shap.summary_plot(state.shap_values, state.X_train, plot_type="bar")) # Violin plot and waterfall plot only available at this time for XGBoost model if user_selected_model != "xgboost": st.warning( "This PoC has only been configured for when **Extreme Gradient Boosting " "(xgboost)** is selected for analysis") bar.progress(100) st.stop() else: # Violin Feature Importance -------------------------------------------------------------------------- st.subheader( f"Success Drivers - All {selected_genre} Playlists") st.pyplot(shap.summary_plot(state.shap_values, state.X_train)) bar.progress(50) # Dependence plots for each of the top 3 features ---------------------------------------------------- st.header(f"Shapley Dependence for {selected_genre} Playlists") vals = np.abs(state.shap_values).mean(0) feature_importance = pd.DataFrame( list(zip(state.X_train.columns, vals)), columns=["col_name", "feature_importance_vals"], ) feature_importance = (feature_importance.sort_values( by=["feature_importance_vals"], ascending=False).reset_index(drop=True).head(3)) top_features = list(feature_importance["col_name"]) for feature in top_features: index = list(state.X_train.columns).index(feature) st.subheader(f"Shapley Value Dependence for {feature}") st.pyplot( shap.dependence_plot( index, state.shap_values, state.X_train, alpha=0.5, interaction_index=None, )) bar.progress(70) # Individual importance ------------------------------------------------------------------------------- st.header( f"Explaining {selected_genre} Playlist Success Prediction") # Display the data frame for users to visually see the row they want to analyze st.subheader("Model Training Data") st.dataframe(state.view) state.new_row = int( st.number_input( "Row from dataframe to inspect", min_value=0, max_value=len(state.view), value=10, )) if state.row != state.new_row: state.row = state.new_row shap_object = ShapObject( base_values=state.explainer.expected_value, values=state.explainer.shap_values( state.X_train)[state.row, :], feature_names=state.X_train.columns, data=state.X_train.iloc[state.row, :], ) bar.progress(85) st.subheader( f"Feature Contributions to {selected_genre} Playlist #{state.row}" ) st.pyplot(shap.waterfall_plot(shap_object)) bar.progress(100) st.stop() else: st.stop() else: st.stop() else: st.stop()
def show_plot(df, datefield, title, wdw, what_to_show_, graph_type, centersmooth): what_to_show_ = what_to_show_ if type(what_to_show_) == list else [what_to_show_] color_list = [ "#02A6A8", "#4E9148", "#F05225", "#024754", "#FBAA27", "#302823", "#F07826", "#ff6666", ] if len(df) == 1 and datefield == "YYYY": st.warning("Selecteer een grotere tijdsperiode") st.stop() if graph_type=="pyplot" : with _lock: fig1x = plt.figure() ax = fig1x.add_subplot(111) for i, what_to_show in enumerate(what_to_show_): sma = df[what_to_show].rolling(window=wdw, center=centersmooth).mean() ax = df[what_to_show].plot( label="_nolegend_", linestyle="dotted", color=color_list[i], linewidth=0.5, ) ax = sma.plot(label=what_to_show, color=color_list[i], linewidth=0.75) ax.set_xticks(df[datefield].index) if datefield == "YYYY": ax.set_xticklabels(df[datefield], fontsize=6, rotation=90) else: ax.set_xticklabels(df[datefield].dt.date, fontsize=6, rotation=90) xticks = ax.xaxis.get_major_ticks() for i, tick in enumerate(xticks): if i % 10 != 0: tick.label1.set_visible(False) plt.xticks() plt.grid(which="major", axis="y") plt.title(title) plt.legend() st.pyplot(fig1x) else: fig = go.Figure() df["sma"] = df[what_to_show_[0]].rolling(window=wdw, center=centersmooth).mean() sma = go.Scatter( name=what_to_show_[0], x=df[datefield], y= df["sma"], mode='lines', line=dict(width=1,color='rgba(0, 0, 168, 0.8)'), ) points = go.Scatter( name="", x=df[datefield], y= df[what_to_show_[0]], mode='markers', showlegend=False,marker=dict( color='LightSkyBlue', size=2)) data = [sma,points] layout = go.Layout( yaxis=dict(title=what_to_show_[0]), title=title,) #, xaxis=dict(tickformat="%d-%m") fig = go.Figure(data=data, layout=layout) fig.update_layout(xaxis=dict(tickformat="%d-%m")) st.plotly_chart(fig, use_container_width=True)
def cs_body(): # Magic commands col1, col2, col3 = st.columns(3) col1.subheader('Magic commands') col1.code('''# Magic commands implicitly `st.write()` \'\'\' _This_ is some __Markdown__ \'\'\' a=3 'dataframe:', data ''') # Display text col1.subheader('Display text') col1.code(''' st.text('Fixed width text') st.markdown('_Markdown_') # see * st.caption('Balloons. Hundreds of them...') st.latex(r\'\'\' e^{i\pi} + 1 = 0 \'\'\') st.write('Most objects') # df, err, func, keras! st.write(['st', 'is <', 3]) # see * st.title('My title') st.header('My header') st.subheader('My sub') st.code('for i in range(8): foo()') * optional kwarg unsafe_allow_html = True ''') # Display data col1.subheader('Display data') col1.code(''' st.dataframe(my_dataframe) st.table(data.iloc[0:10]) st.json({'foo':'bar','fu':'ba'}) st.metric(label="Temp", value="273 K", delta="1.2 K") ''') # Display charts col1.subheader('Display charts') col1.code(''' st.line_chart(data) st.area_chart(data) st.bar_chart(data) st.pyplot(fig) st.altair_chart(data) st.vega_lite_chart(data) st.plotly_chart(data) st.bokeh_chart(data) st.pydeck_chart(data) st.deck_gl_chart(data) st.graphviz_chart(data) st.map(data) ''') # Display media col1.subheader('Display media') col1.code(''' st.image('./header.png') st.audio(data) st.video(data) ''') # Display interactive widgets col2.subheader('Display interactive widgets') col2.code(''' st.button('Hit me') st.download_button('On the dl', data) st.checkbox('Check me out') st.radio('Radio', [1,2,3]) st.selectbox('Select', [1,2,3]) st.multiselect('Multiselect', [1,2,3]) st.slider('Slide me', min_value=0, max_value=10) st.select_slider('Slide to select', options=[1,'2']) st.text_input('Enter some text') st.number_input('Enter a number') st.text_area('Area for textual entry') st.date_input('Date input') st.time_input('Time entry') st.file_uploader('File uploader') st.color_picker('Pick a color') ''') col2.write('Use widgets\' returned values in variables:') col2.code(''' >>> for i in range(int(st.number_input('Num:'))): foo() >>> if st.sidebar.selectbox('I:',['f']) == 'f': b() >>> my_slider_val = st.slider('Quinn Mallory', 1, 88) >>> st.write(slider_val) ''') # Control flow col2.subheader('Control flow') col2.code(''' st.stop() ''') # Lay out your app col2.subheader('Lay out your app') col2.code(''' st.form('my_form_identifier') st.form_submit_button('Submit to me') st.container() st.columns(spec) >>> col1, col2 = st.columns(2) >>> col1.subheader('Columnisation') st.expander('Expander') >>> with st.expander('Expand'): >>> st.write('Juicy deets') ''') col2.write('Batch widgets together in a form:') col2.code(''' >>> with st.form(key='my_form'): >>> text_input = st.text_input(label='Enter some text') >>> submit_button = st.form_submit_button(label='Submit') ''') # Display code col2.subheader('Display code') col2.code(''' st.echo() >>> with st.echo(): >>> st.write('Code will be executed and printed') ''') # Display progress and status col3.subheader('Display progress and status') col3.code(''' st.progress(progress_variable_1_to_100) st.spinner() >>> with st.spinner(text='In progress'): >>> time.sleep(5) >>> st.success('Done') st.balloons() st.error('Error message') st.warning('Warning message') st.info('Info message') st.success('Success message') st.exception(e) ''') # Placeholders, help, and options col3.subheader('Placeholders, help, and options') col3.code(''' st.empty() >>> my_placeholder = st.empty() >>> my_placeholder.text('Replaced!') st.help(pandas.DataFrame) st.get_option(key) st.set_option(key, value) st.set_page_config(layout='wide') ''') # Mutate data col3.subheader('Mutate data') col3.code(''' DeltaGenerator.add_rows(data) >>> my_table = st.table(df1) >>> my_table.add_rows(df2) >>> my_chart = st.line_chart(df1) >>> my_chart.add_rows(df2) ''') # Optimize performance col3.subheader('Optimize performance') col3.code(''' @st.cache >>> @st.cache ... def fetch_and_clean_data(url): ... # Mutate data at url ... return data >>> # Executes d1 as first time >>> d1 = fetch_and_clean_data(ref1) >>> # Does not execute d1; returns cached value, d1==d2 >>> d2 = fetch_and_clean_data(ref1) >>> # Different arg, so function d1 executes >>> d3 = fetch_and_clean_data(ref2) ''') col3.subheader('Other key parts of the API') col3.markdown(''' <small>[State API](https://docs.streamlit.io/en/stable/session_state_api.html)</small><br> <small>[Theme option reference](https://docs.streamlit.io/en/stable/theme_options.html)</small><br> <small>[Components API reference](https://docs.streamlit.io/en/stable/develop_streamlit_components.html)</small><br> <small>[API cheat sheet](https://share.streamlit.io/daniellewisdl/streamlit-cheat-sheet/app.py)</small><br> ''', unsafe_allow_html=True) return None
def page_work(state_container, page_flip: bool): '''The main workhorse routine for the Xplore page''' if not state_container.xploreSessionState: state_container.xploreSessionState = XploreSessionState() state = state_container.xploreSessionState state.columns = ['default'] else: state = state_container.xploreSessionState url_params = st.experimental_get_query_params() page = url_params.pop('page', '') if get_title() in page: if url_params and not all(not x for x in url_params.values()): for key in url_params: if key == 'columns': # This needs to be a list continue val = url_params.get(key, '') if isinstance(val, list): val = val[0] url_params[key] = val if key == '': if val == 'True': url_params[key] = True else: url_params[key] = False state.__init__(**url_params) sqobjs = state_container.sqobjs # All the user input is preserved in the state vars xplore_sidebar(state, sqobjs) if state.table != "tables": df = gui_get_df(sqobjs[state.table], _table=state.table, namespace=state.namespace.split(), hostname=state.hostname.split(), start_time=state.start_time, end_time=state.end_time, view=state.view, columns=state.columns) if state.table == "device" and 'uptime' in df.columns: df.drop(columns=['uptime'], inplace=True) else: df = gui_get_df(sqobjs[state.table], _table=state.table, namespace=state.namespace.split(), hostname=state.hostname.split(), start_time=state.start_time, end_time=state.end_time, view=state.view) query_str = '' if not df.empty: if 'error' in df.columns: st.error(df.iloc[0].error) st.experimental_set_query_params(**asdict(state)) st.stop() if state.query: try: show_df = df.query(state.query) query_str = state.query except Exception: st.warning('Query string throws an exception, ignoring') show_df = df query_str = '' else: show_df = df else: show_df = df if state.table != "tables": summ_df = xplore_run_summarize(sqobjs[state.table], namespace=state.namespace.split(), hostname=state.hostname.split(), start_time=state.start_time, end_time=state.end_time, query_str=query_str) else: summ_df = pd.DataFrame() if not show_df.empty: dfcols = show_df.columns.tolist() if (state.table == 'routes' and 'prefix' in dfcols and 'prefixlen' not in dfcols): dfcols.append('prefixlen') dfcols = sorted((filter(lambda x: x not in ['index', 'sqvers'], dfcols))) grid1 = st.beta_container() headercol, uniq_col = st.beta_columns(2) with grid1: with headercol: st.write( f'<h2 style="color: darkblue; font-weight: bold;">{state.table} View</h2>', unsafe_allow_html=True) if show_df.shape[0] > 256: st.write( f'Showing first 256 of {show_df.shape[0]} rows, use query to filter' ) with uniq_col: if state.table != "tables": if (not state.uniq_clicked or state.uniq_clicked not in dfcols): if 'hostname' in dfcols: selindex = dfcols.index('hostname') + 1 else: selindex = 1 elif state.uniq_clicked in dfcols: selindex = dfcols.index(state.uniq_clicked) + 1 state.uniq_clicked = st.selectbox('Distribution Count of', options=['-'] + dfcols, index=selindex, key='distcount') scol1, scol2 = st.beta_columns(2) if state.table != "tables" and state.uniq_clicked != '-': uniq_df = xplore_run_unique(show_df, columns=state.uniq_clicked) else: uniq_df = pd.DataFrame() if state.assert_clicked: assert_df = xplore_run_assert(sqobjs[state.table], start_time=state.start_time, end_time=state.end_time, namespace=state.namespace.split()) else: assert_df = pd.DataFrame() if not summ_df.empty: with scol1: st.subheader('Summary Information') st.dataframe(data=summ_df) if not uniq_df.empty: with scol2: if uniq_df.shape[0] > 32: st.warning( f'{state.uniq_clicked} has cardinality > 32. Displaying top 32' ) chart = alt.Chart( uniq_df.head(32), title=f'{state.uniq_clicked} Distribution') \ .mark_bar(color='purple', tooltip=True) \ .encode(y=alt.Y(f'{state.uniq_clicked}:N', sort='-x'), x='count') else: chart = alt.Chart( uniq_df, title=f'{state.uniq_clicked} Distribution') \ .mark_bar(color='purple', tooltip=True) \ .encode(y=alt.Y(f'{state.uniq_clicked}:N', sort='-x'), x='count') st.altair_chart(chart) if state.table in ['interfaces', 'ospf', 'bgp', 'evpnVni']: if assert_df.empty: expand_assert = False else: expand_assert = True validate_expander = st.beta_expander('Assert', expanded=expand_assert) with validate_expander: if not assert_df.empty: st.dataframe(data=assert_df) elif state.assert_clicked: st.write('Assert passed') else: st.write('Assert not run') expander = st.beta_expander('Table', expanded=True) with expander: if not show_df.empty: convert_dict = { x: 'str' for x in df.select_dtypes('category').columns } st.dataframe(data=sq_gui_style( show_df.head(256).astype(convert_dict), state.table), height=600, width=2500) else: st.warning('No Data from query') st.experimental_set_query_params(**asdict(state))
def xplore_sidebar(state, sqobjs: dict): '''Draw appropriate sidebar for the page''' stime = state.start_time etime = state.end_time table_vals = sorted(list(sqobjs.keys())) if state.table: if isinstance(state.table, list): tblidx = table_vals.index(state.table[0]) else: tblidx = table_vals.index(state.table) else: tblidx = table_vals.index('device') # Default starting table assert_val = state.assert_clicked view_idx = 1 if state.view == 'all' else 0 devdf = gui_get_df(sqobjs['device'], columns=['namespace', 'hostname']) if devdf.empty: st.error('Unable to retrieve any namespace info') st.stop() namespaces = [""] namespaces.extend(sorted(devdf.namespace.unique().tolist())) if state.namespace: nsidx = namespaces.index(state.namespace) else: nsidx = 0 namespace = st.sidebar.selectbox('Namespace', namespaces, index=nsidx) if namespace != state.namespace: state.hostname = None state.namespace = namespace hostnames = [""] if state.namespace: hostlist = devdf.query(f'namespace=="{state.namespace}"') \ .hostname.unique().tolist() else: hostlist = devdf.hostname.unique().tolist() hostnames.extend(sorted(hostlist)) if state.hostname: hostidx = hostnames.index(state.hostname) else: hostidx = 0 state.hostname = st.sidebar.selectbox('Hostname', hostnames, index=hostidx) state.start_time = st.sidebar.text_input('Start time', value=stime, key='stime') state.end_time = st.sidebar.text_input('End time', value=etime, key='etime') table = st.sidebar.selectbox('Select Table to View', tuple(table_vals), index=tblidx) if table != state.table: # We need to reset the specific variables state.query = '' state.assert_clicked = False state.uniq_clicked = 0 state.table = table state.columns = 'default' view_vals = ('latest', 'all') if state.start_time and state.end_time: # We show everything thats happened when both times are specified view_idx = 1 state.view = st.sidebar.radio("View of Data", view_vals, index=view_idx) fields = TablesObj().describe(table=state.table) if state.table != 'tables': colist = sorted((filter(lambda x: x not in ['index', 'sqvers'], fields.name.tolist()))) columns = st.sidebar.multiselect('Pick columns', ['default', 'all'] + colist, default=state.columns) if ('default' in columns or 'all' in columns) and len(columns) == 1: col_sel_val = True else: col_sel_val = False col_ok = st.sidebar.checkbox('Column Selection Done', value=col_sel_val) if not col_ok: columns = ['default'] else: col_ok = True columns = ['default'] if not columns: columns = ['default'] state.columns = columns if state.table in ['interfaces', 'ospf', 'bgp', 'evpnVni']: state.assert_clicked = st.sidebar.checkbox('Run Assert', value=assert_val) else: state.assert_clicked = False if not col_ok: st.experimental_set_query_params(**asdict(state)) st.stop() if ('default' in columns or 'all' in columns) and len(columns) != 1: st.error('Cannot select default/all with any other columns') st.experimental_set_query_params(**asdict(state)) st.stop() elif not columns: st.error('Columns cannot be empty') st.experimental_set_query_params(**asdict(state)) st.stop() state.query = st.sidebar.text_input('Filter results with pandas query', value=state.query, key=state.table) st.sidebar.markdown( "[query syntax help](https://suzieq.readthedocs.io/en/latest/pandas-query-examples/)" ) if columns == ['all']: columns = ['*'] if state.table != "tables": col_expander = st.sidebar.beta_expander('Column Names', expanded=False) with col_expander: st.subheader(f'{state.table} column names') st.table(TablesObj().describe( table=state.table).query('name != "sqvers"').reset_index( drop=True).style)
def checkpoint(): prep_data = st.button('Prepare data') if not prep_data: st.warning('Please prepare the data before proceeding.') st.stop() st.success('Thank you for preparing the data')
def graph_day(df, what_to_show_l, title): """ _ _ _ """ #st.write(f"t = {t}") df_temp = pd.DataFrame(columns=["date"]) if what_to_show_l is None: st.warning("Choose something") st.stop() if type(what_to_show_l) == list: what_to_show_l_ = what_to_show_l else: what_to_show_l_ = [what_to_show_l] aantal = len(what_to_show_l_) # SHOW A GRAPH IN TIME / DAY with _lock: fig1x = plt.figure() ax = fig1x.add_subplot(111) color_list = [ "#02A6A8", "#4E9148", "#F05225", ] n = 0 # counter to walk through the colors-list for b in what_to_show_l_: df_temp = df df_temp[b].plot( label="_nolegend_", color=color_list[n], linestyle="--", alpha=0.9, linewidth=0.8, ) n += 1 plt.title(title, fontsize=10) # show every 10th date on x axis a__ = (max(df_temp["date"].tolist())).date() - (min( df_temp["date"].tolist())).date() freq = int(a__.days / 10) ax.xaxis.set_major_locator(MultipleLocator(freq)) ax.set_xticks(df_temp["date"].index) ax.set_xticklabels(df_temp["date"].dt.date, fontsize=6, rotation=90) xticks = ax.xaxis.get_major_ticks() # for i, tick in enumerate(xticks): # if i % 10 != 0: # tick.label1.set_visible(False) plt.xticks() # layout of the x-axis ax.xaxis.grid(True, which="major", alpha=0.4, linestyle="--") ax.yaxis.grid(True, which="major", alpha=0.4, linestyle="--") left, right = ax.get_xlim() ax.set_xlim(left, right) fontP = FontProperties() fontP.set_size("xx-small") plt.xlabel("date") # everything in legend # https://stackoverflow.com/questions/33611803/pyplot-single-legend-when-plotting-on-secondary-y-axis handles, labels = [], [] for ax in fig1x.axes: for h, l in zip(*ax.get_legend_handles_labels()): handles.append(h) labels.append(l) # plt.legend(handles,labels) # https://stackoverflow.com/questions/4700614/how-to-put-the-legend-out-of-the-plot/43439132#43439132 plt.legend(handles, labels, bbox_to_anchor=(0, -0.5), loc="lower left", ncol=2) ax.text( 1, 1.1, "Created by Rene Smit — @rcsmit", transform=ax.transAxes, fontsize="xx-small", va="top", ha="right", ) st.pyplot(fig1x)
def error(msg): global ERROR ERROR.markdown(f"<p style='font-size:18px'><span style='color:{DFT.RED}'><b>{msg}</b></span></p>", unsafe_allow_html=True) st.stop()
def app(): """ Covid world webgui """ options = [ "New cases", "New deaths", "Total cases", "Total deaths", "Hosp patients per mill", "Positivity rate", ] plot_selected = st.sidebar.selectbox("Select a plot", options, index=0) date_selected = st.sidebar.date_input("Change the dates?", value=(dt.datetime(2020, 3, 1), dt.datetime.now())) if len(date_selected) != 2: st.info("Select a beginning and end date") st.stop() ##### Retrieve ##### columns = [ "location", "continent", "date", "hosp_patients_per_million", "new_cases_smoothed_per_million", "new_deaths_smoothed_per_million", "total_cases_per_million", "total_deaths_per_million", "rolling_pos_per_tests", ] my_df = pd.DataFrame(h.sql_orm_requester(columns, table, session)) my_df.columns = columns my_df["date"] = pd.to_datetime(my_df["date"]) # st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True) region_options = ["Default", "Continents", "World"] regions = [ "North America", "West Europe", "East Europe", "Nordics", "Asia" ] regions.sort() region_options = region_options + regions region = st.sidebar.radio("Preset locations", options=region_options, index=0) if region == "North America": default = ["Canada", "United States", "Mexico"] elif region == "Nordics": default = ["Sweden", "Finland", "Norway"] elif region == "West Europe": default = [ "Austria", "Belgium", "France", "Germany", "Spain", "United Kingdom", "Portugal", "Netherlands", "Switzerland", ] elif region == "East Europe": default = [ "Hungary", "Slovakia", "Austria", "Slovenia", "Croatia", "Serbia", "Romania", "Ukraine", ] elif region == "Continents": default = [ "Europe", "North America", "South America", "Africa", "Asia", "Australia", ] elif region == "Asia": default = ["Japan", "South Korea", "Thailand", "India"] st.sidebar.info( "__Note__: China not included by default due to low reported numbers" ) china_wanted = st.sidebar.checkbox("Add China") if china_wanted: default.append("China") elif region == "World": default = list(my_df['location'].unique()) else: default = ["Canada", "Hungary", "United States"] default.sort() premade_df = h.dataset_filterer(my_df, "location", default_selected=default) plot_selected = plot_selected.lower() if "positivity rate" in plot_selected: # cant do log of this data, so checkbox not needed st.info( "W.H.O. guidelines recommend a positivity rate of at most __0.05__ for two weeks before nations reopen." ) ylabel = "rolling_pos_per_tests" title = "Positivity rate by location" graph_caller(ylabel, date_selected, premade_df, title, ylog=False, yrange=(0, 0.5)) st.write( "The positivity rate is calculated as 'number of positive tests' / 'positive + negative tests'" ) else: placeholder = st.empty() placeholder.info( "__Instructions:__ Move mouse into plot to interact. Drag and select to zoom. Double click to reset. Click the camera to save." ) col_annot, col_ylog = st.columns(2) with col_annot: show_annot = st.checkbox("Show annotations", value=True) with col_ylog: ylog = st.checkbox("log(y axis)") if ylog: placeholder.info( "The log of these values indicates the speed of transmission, making the flattening of curves more apparent." ) if "new deaths" in plot_selected: ylabel = "new_deaths_smoothed_per_million" title = "New deaths per million by location" elif "new cases" in plot_selected: ylabel = "new_cases_smoothed_per_million" title = "New cases per million by location" elif "total cases" in plot_selected: ylabel = "total_cases_per_million" title = "Total cases per million by location" elif "total deaths" in plot_selected: ylabel = "total_deaths_per_million" title = "Total deaths per million by location" elif "hosp patients per mill" in plot_selected: ylabel = "hosp_patients_per_million" title = "Hospital patients per million by location" placeholder.warning( "The graph may be blank as not all countries publish hospital data" ) graph_caller(ylabel, date_selected, premade_df, title, ylog=ylog, show_annot=show_annot) # leaderboard yesterday = dt.datetime.now() - dt.timedelta(days=2) fig = h.overview_plotter(yesterday.date(), premade_df, x='location', y=ylabel, sortby='continent', title=f"Top {title.lower()} in the past week") st.plotly_chart(fig)
def ttest_upload_data_ui(): '''The Two-sample Student's t-test - Continuous variables (upload data) section. ''' # Render the header. with st.beta_container(): st.title('Two-sample Student\'s t-test') st.header('Continuous variables') # Render file dropbox with st.beta_expander('Upload data', expanded=True): how_to_load = st.selectbox('How to access raw data? ', ('Upload', 'URL', 'Sample data')) if how_to_load == 'Upload': uploaded_file = st.file_uploader("Choose a CSV file", type='.csv') elif how_to_load == 'URL': uploaded_file = st.text_input('File URL: ') if uploaded_file == '': uploaded_file = None elif how_to_load == 'Sample data': uploaded_file = 'https://raw.githubusercontent.com/luxin-tian/mosco_ab_test/main/sample_data/cookie_cats.csv' if uploaded_file is not None: with st.spinner('Loading data...'): df = _load_data(uploaded_file) if uploaded_file is not None: with st.beta_expander('Data preview', expanded=True): with st.spinner('Loading data...'): st.dataframe(df) st.write('`{}` rows, `{}` columns'.format(df.shape[0],df.shape[1])) if uploaded_file is not None: with st.beta_expander('Configurations', expanded=True): df_columns_types = [ind + ' (' + val.name + ')' for ind, val in df.dtypes.iteritems()] df_columns_dict = {(ind + ' (' + val.name + ')'): ind for ind, val in df.dtypes.iteritems()} var_group_label = df_columns_dict[st.selectbox('Group label', df_columns_types)] col1, col2 = st.beta_columns(2) with col1: var_group_name_1 = st.selectbox('Group name A', df[var_group_label].unique()) with col2: var_group_name_2 = st.selectbox('Group name B', df[var_group_label].unique()) var_outcome = [df_columns_dict[var] for var in st.multiselect('Outcome variable: ', df_columns_types)] col1, col2 = st.beta_columns([1, 1]) with col1: conf_level = st.select_slider('Confidence level: ', ('0.90', '0.95', '0.99')) with col2: hypo_type = st.radio('Hypothesis type: ', ('One-sided', 'Two-sided')) if_dropna = st.checkbox('Drop null values', value=True) if_remove_outliers = st.checkbox('Remove outliers', value=False) if if_remove_outliers: outlier_lower_qtl, outlier_upper_qtl = st.slider('Quantiles (observations falling into the tails will be removed): ', min_value=0.0, max_value=1.0, step=0.01, value=(0.0, 0.95)) # col1, col2 = st.beta_columns(2) # with col1: # outlier_lower_qtl = st.slider('Lower quantile: ', min_value=0.0, max_value=0.25, step=0.01, value=0.0) # with col2: # outlier_upper_qtl = st.slider('Upper quantile: ', min_value=0.75, max_value=1.00, step=0.01, value=0.99) else: outlier_lower_qtl, outlier_upper_qtl = None, None if_data_description = st.checkbox('Show descriptive statistics', value=False) if_apply = st.button('Confirm') if uploaded_file is not None: if if_apply: if var_group_name_1 == var_group_name_2: st.error('The names of Group A and Group B cannot be identical. ') st.stop() for col in var_outcome: df = _process_data(df=df, col=col, if_dropna=if_dropna, if_remove_outliers=if_remove_outliers, outlier_lower_qtl=outlier_lower_qtl, outlier_upper_qtl=outlier_upper_qtl) # Render hypothesis testing with st.beta_expander('Hypothesis testing', expanded=True): with st.spinner('Calculating...'): df_group_1 = df[df[var_group_label] == var_group_name_1] df_group_2 = df[df[var_group_label] == var_group_name_2] for var in var_outcome: st.markdown(f'`{var}`: {df[var].dtype}') mu_1 = np.mean(df_group_1[var]) mu_2 = np.mean(df_group_2[var]) sigma_1 = np.std(df_group_1[var], ddof=1) sigma_2 = np.std(df_group_2[var], ddof=1) n_1 = len(df_group_1[var]) n_2 = len(df_group_2[var]) tstat, p_value, tstat_denom, pooled_sd, effect_size = scipy_ttest_ind_from_stats( mu_1, mu_2, sigma_1, sigma_2, n_1, n_2) observed_power = sm_tt_ind_solve_power(effect_size=effect_size, n1=n_1, n2=n_2, alpha=1-float(conf_level), power=None, hypo_type=hypo_type, if_plot=False) # Render the results ttest_plot(mu_1, mu_2, sigma_1, sigma_2, conf_level, tstat, p_value, tstat_denom, hypo_type, observed_power) # Render descriptive statistics if if_data_description: with st.beta_expander('Data descriptions', expanded=True): with st.spinner('Processing data...'): # if if_factorize: # df[var_hot_encoding] = df[var_hot_encoding].astype('category') df = df[(df[var_group_label] == var_group_name_1) | (df[var_group_label] == var_group_name_2)] df_summary = df.groupby(by=var_group_label).describe(include='all') # Plot distribution for var in var_outcome: st.markdown(f'`{var}`: {df[var].dtype}') st.table(df_summary[var].T.dropna()) fig_1 = sns.displot(data=df, x=var, col=var_group_label, kde=True) fig_2 = sns.displot(data=df, kind="ecdf", x=var, hue=var_group_label, rug=True) fig_3, ax = plt.subplots() ax = sns.boxplot(data=df, y=var, hue=var_group_label) st.pyplot(fig_1) col1, col2 = st.beta_columns([1, 1.1]) with col1: st.pyplot(fig_2) with col2: st.pyplot(fig_3) return
def plot_percentiles(df, gekozen_weerstation, what_to_show, wdw, centersmooth): if len(what_to_show)!=1 : st.warning("Choose (only) 1 thing to show") st.stop() df_quantile = pd.DataFrame( {"date": [], "q10": [], "q25": [], "q50":[] ,"avg": [], "q75": [], "q90": []} ) year_to_show = st.sidebar.number_input("Year to show (2100 for nothing)", 1900, 2100, 2021) (month_from,month_until) = st.sidebar.slider("Months (from/until (incl.))", 1, 12, (1,12)) if month_from > month_until: st.warning("Make sure that the end month is not before the start month") st.stop() df = df[ (df["YYYYMMDD"].dt.month >= month_from) & (df["YYYYMMDD"].dt.month <= month_until) ] for month in list(range(1,13)): for day in list(range(1,32)): if month==2 and day==29: pass else: df_ = df[ (df["YYYYMMDD"].dt.month == month) & (df["YYYYMMDD"].dt.day == day) ] df__ = df[ (df["YYYYMMDD"].dt.year == year_to_show) & (df["YYYYMMDD"].dt.month == month) & (df["YYYYMMDD"].dt.day == day) ] if len(df__)>0: value_in_year_ = df__[what_to_show].iloc[0] value_in_year = value_in_year_[0] else: value_in_year = None if len(df_)>0: data = df_[what_to_show] #.tolist() #st.write(data) date_ = "1900-" + str(month).zfill(2) + '-' + str(day).zfill(2) q10 = np.percentile(data, 10) q25 = np.percentile(data, 25) q50 = np.percentile(data, 50) q75 = np.percentile(data, 75) q90 = np.percentile(data, 90) avg = data.mean() df_quantile = df_quantile.append( { "date_": date_, "q10": q10, "q25": q25, "q50": q50, "avg": avg, "q75": q75, "q90": q90, "value_in_year" : value_in_year }, ignore_index=True, ) df_quantile['date'] = pd.to_datetime(df_quantile.date_, format='%Y-%m-%d', errors='coerce') columns = ["q10", "q25", "avg", "q50", "q75", "q90", "value_in_year"] for c in columns: df_quantile[c] = df_quantile[c].rolling(window=wdw, center=centersmooth).mean() df_quantile[c] = round(df_quantile[c],1) colors = ["red", "blue", ["yellow"]] title = (f" {what_to_show[0]} in {gekozen_weerstation} (percentiles (10/25/avg/75/90/))") graph_type = "plotly" if graph_type == "pyplot": with _lock: fig1x = plt.figure() ax = fig1x.add_subplot(111) idx = 0 df_quantile.plot(x='date',y='avg', ax=ax, linewidth=0.75, color=colors[idx], label="avg") # df_quantile.plot(x='date',y='q50', ax=ax, linewidth=0.75, # color="yellow", # label="mediaan", alpha=0.75) df_quantile.plot(x='date',y='value_in_year', ax=ax, color="black", linewidth=0.75, label=f"value in {year_to_show}") ax.fill_between(df_quantile['date'], y1=df_quantile['q25'], y2=df_quantile['q75'], alpha=0.30, facecolor=colors[idx]) ax.fill_between(df_quantile['date'], y1=df_quantile['q10'], y2=df_quantile['q90'], alpha=0.15, facecolor=colors[idx]) ax.set_xticks(df_quantile["date"].index) # if datefield == "YYYY": # ax.set_xticklabels(df[datefield], fontsize=6, rotation=90) # else: ax.set_xticklabels(df_quantile["date"], fontsize=6, rotation=90) xticks = ax.xaxis.get_major_ticks() for i, tick in enumerate(xticks): if i % 10 != 0: tick.label1.set_visible(False) # plt.xticks() plt.grid(which="major", axis="y") plt.title(title) plt.legend() st.pyplot(fig1x) else: fig = go.Figure() q10 = go.Scatter( name='q10', x=df_quantile["date"], y=df_quantile['q10'] , mode='lines', line=dict(width=0.5, color="rgba(255, 188, 0, 0.5)"), fillcolor='rgba(68, 68, 68, 0.1)', fill='tonexty') q25 = go.Scatter( name='q25', x=df_quantile["date"], y=df_quantile['q25'] , mode='lines', line=dict(width=0.5, color="rgba(255, 188, 0, 0.5)"), fillcolor='rgba(68, 68, 68, 0.2)', fill='tonexty') avg = go.Scatter( name=what_to_show[0], x=df_quantile["date"], y=df_quantile["avg"], mode='lines', line=dict(width=0.75,color='rgba(68, 68, 68, 0.8)'), ) value_in_year__ = go.Scatter( name="2021", x=df_quantile["date"], y=df_quantile["value_in_year"], mode='lines', line=dict(width=0.75,color='rgba(255, 0, 0, 0.8)'), ) q75 = go.Scatter( name='q75', x=df_quantile["date"], y=df_quantile['q75'] , mode='lines', line=dict(width=0.5, color="rgba(255, 188, 0, 0.5)"), fillcolor='rgba(68, 68, 68, 0.1)', fill='tonexty') q90 = go.Scatter( name='q90', x=df_quantile["date"], y=df_quantile['q90'], mode='lines', line=dict(width=0.5, color="rgba(255, 188, 0, 0.5)"), fillcolor='rgba(68, 68, 68, 0.1)' ) data = [q90, q75, q25, q10,avg, value_in_year__ ] layout = go.Layout( yaxis=dict(title=what_to_show[0]), title=title,) #, xaxis=dict(tickformat="%d-%m") fig = go.Figure(data=data, layout=layout) fig.update_layout(xaxis=dict(tickformat="%d-%m")) st.plotly_chart(fig, use_container_width=True)
def main(): """ Main is responsible for the visualisation of everything connected with streamlit. It is the web application itself. """ # # Radiobuttons in one row # st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True) # Sets sidebar's header and logo sidebar.sidebar_head() # # # Spectrometer type `- BWTek / Renishaw / Witec / Wasatch / Teledyne # spectra_types = [ 'EMPTY', 'BWTEK', 'RENI', 'WITEC', 'WASATCH', 'TELEDYNE', 'JOBIN' ] spectrometer = st.sidebar.selectbox("Choose spectra type", spectra_types, format_func=LABELS.get, index=0) # sidebar separating line sidebar.print_widgets_separator() # User data loader # sidebar.print_widget_labels('Upload your data or try with ours', 10, 0) files = st.sidebar.file_uploader(label='Upload your data or try with ours', accept_multiple_files=True, type=['txt', 'csv']) # Allow example data loading when no custom data are loaded if not files: if st.sidebar.checkbox("Load example data"): if spectrometer == "EMPTY": st.sidebar.error('First Choose Spectra type') else: files = utils.load_example_files(spectrometer) # Check if data loaded, if yes, perform actions delim = None if files: st.spinner('Uploading data in progress') # sidebar separating line sidebar.print_widgets_separator() from detect_delimiter import detect new_files = [] for file in files: file.seek(0) lines = file.readlines() try: lines = [line.decode('utf-8') for line in lines] except AttributeError: pass # lines = str.splitlines(str(text)) # .split('\n') first_lines = '\n'.join(lines[:20]) delim = detect(first_lines) colnum = lines[-2].count(delim) lines = [i for i in lines if i.count(delim) == colnum] text = '\n'.join(lines) buffer = io.StringIO(text) buffer.name = file.name new_files.append(buffer) try: df = save_read.read_files(spectrometer, new_files, delim) except (TypeError, ValueError): st.error('Try choosing another type of spectra') st.stop() main_expander = st.beta_expander("Customize your chart") # Choose plot colors and templates with main_expander: plots_color, template = vis_utils.get_chart_vis_properties() # Select chart type chart_type = vis_opt.vis_options() # sidebar separating line sidebar.print_widgets_separator() # Select data conversion type spectra_conversion_type = vis_opt.convertion_opt() # TODO need improvements # getting rid of duplicated columns df = df.loc[:, ~df.columns.duplicated()] # # # data manipulation - raw / optimization / normalization # # TODO delete if not needed # Normalization # if spectra_conversion_type == LABELS["NORM"]: # df = (df - df.min()) / (df.max() - df.min()) # Mean Spectra if chart_type == 'MS': df = df.mean(axis=1).rename('Average').to_frame() # columns in main view. Chart, expanders # TODO rozwiązać to jakoś sprytniej normalized = False col_left, col_right = st.beta_columns([5, 2]) if spectra_conversion_type != "RAW": col_right = col_right.beta_expander("Customize spectra", expanded=False) with col_right: vals = data_customisation.get_deg_win(chart_type, spectra_conversion_type, df.columns) if st.checkbox("Data Normalization"): normalized = True df = (df - df.min()) / (df.max() - df.min()) else: normalized = False # For grouped spectra sometimes we want to shift the spectra from each other, here it is: with main_expander: # TODO the code below needed? # trick to better fit sliders in expander # _, main_expander_column, _ = st.beta_columns([1, 38, 1]) # with main_expander_column: shift_col, _, trim_col = st.beta_columns([5, 1, 5]) with shift_col: if chart_type == 'GS': shift = data_customisation.separate_spectra(normalized) elif chart_type == 'SINGLE': col = st.selectbox('spectrum to plot', df.columns) df = df[[col]] else: shift = None with trim_col: df = vis_utils.trim_spectra(df) # data conversion end if spectra_conversion_type in {'OPT'}: baselines = pd.DataFrame(index=df.index) baselined = pd.DataFrame(index=df.index) flattened = pd.DataFrame(index=df.index) for col in df.columns: baselines[col] = peakutils.baseline(df[col], vals[col][0]) baselined[col] = df[col] - baselines[col] flattened[col] = baselined[col].rolling(window=vals[col][1], min_periods=1, center=True).mean() # # # Plotting # # Groupped spectra if chart_type == 'GS': shifters = [(i + 1) * shift for i in range(len(df.columns))] plot_df = df if spectra_conversion_type == 'RAW' else flattened plot_df = plot_df + shifters figs = [ px.line(plot_df, x=plot_df.index, y=plot_df.columns, color_discrete_sequence=plots_color) ] # Mean spectra elif chart_type == 'MS': if spectra_conversion_type == 'RAW': plot_df = df figs = [ px.line(plot_df, x=plot_df.index, y=plot_df.columns, color_discrete_sequence=plots_color) ] elif spectra_conversion_type in {'OPT'}: columns = [ 'Average', 'Baseline', 'BL-Corrected', 'Flattened + BL-Corrected' ] plot_df = pd.concat([df, baselines, baselined, flattened], axis=1) plot_df.columns = columns fig1 = px.line(plot_df, x=plot_df.index, y=columns[-1], color_discrete_sequence=plots_color[3:]) fig2 = px.line(plot_df, x=plot_df.index, y=plot_df.columns, color_discrete_sequence=plots_color) figs = [(fig1, fig2)] else: raise ValueError( 'Unknown conversion type for Mean spectrum chart') # 3D spectra elif chart_type == 'P3D': plot_df = flattened if spectra_conversion_type in {"OPT"} else df plot_df = plot_df.reset_index().melt('Raman Shift', plot_df.columns) fig = px.line_3d(plot_df, x='variable', y='Raman Shift', z='value', color='variable') camera = dict(eye=dict(x=1.9, y=0.15, z=0.2)) fig.update_layout( scene_camera=camera, width=1200, height=1200, margin=dict(l=1, r=1, t=30, b=1), ) figs = [fig] # Single spectra elif chart_type == 'SINGLE': if spectra_conversion_type == 'RAW': plot_df = df figs = [ px.line(plot_df[col], color_discrete_sequence=plots_color) for col in plot_df.columns ] else: columns = [ 'Average', 'Baseline', 'BL-Corrected', 'Flattened + BL-Corrected' ] figs = [] plot_df = pd.concat([df, baselines, baselined, flattened], axis=1) plot_df.columns = columns fig1 = px.line(plot_df, x=plot_df.index, y=columns[-1], color_discrete_sequence=plots_color[3:] ) # trick for color consistency fig2 = px.line(plot_df, x=plot_df.index, y=plot_df.columns, color_discrete_sequence=plots_color) fig_tup = (fig1, fig2) figs.append(fig_tup) else: raise ValueError("Something unbelievable has been chosen") with col_left: charts.show_charts(figs, plots_color, template) with col_left: st.markdown('') link = utils.download_button(plot_df.reset_index(), f'spectrum.csv', button_text='Download CSV') st.markdown(link, unsafe_allow_html=True) else: manual.show_manual() authors.show_developers()
def getdata(stn, fromx, until): with st.spinner(f"GETTING ALL DATA ..."): # url = "https://www.daggegevens.knmi.nl/klimatologie/daggegevens?stns=251&vars=TEMP&start=18210301&end=20210310" # https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script # url = f"https://www.daggegevens.knmi.nl/klimatologie/daggegevens?stns={stn}&vars=ALL&start={fromx}&end={until}" url = f"https://www.daggegevens.knmi.nl/klimatologie/daggegevens?stns={stn}&vars=TEMP:SQ:SP:Q:DR:RH&start={fromx}&end={until}" try: df = pd.read_csv( url, delimiter=",", header=None, comment="#", low_memory=False, ) except: st.write("FOUT BIJ HET INLADEN.") st.stop() # TG : Etmaalgemiddelde temperatuur (in 0.1 graden Celsius) / Daily mean temperature in (0.1 degrees Celsius) # TN : Minimum temperatuur (in 0.1 graden Celsius) / Minimum temperature (in 0.1 degrees Celsius) # TNH : Uurvak waarin TN is gemeten / Hourly division in which TN was measured # TX : Maximum temperatuur (in 0.1 graden Celsius) / Maximum temperature (in 0.1 degrees Celsius) # TXH : Uurvak waarin TX is gemeten / Hourly division in which TX was measured # T10N : Minimum temperatuur op 10 cm hoogte (in 0.1 graden Celsius) / Minimum temperature at 10 cm above surface (in 0.1 degrees Celsius) # T10NH : 6-uurs tijdvak waarin T10N is gemeten / 6-hourly division in which T10N was measured; 6=0-6 UT; 12=6-12 UT; 18=12-18 UT; 24=18-24 UT # SQ : Zonneschijnduur (in 0.1 uur) berekend uit de globale straling (-1 voor <0.05 uur) / Sunshine duration (in 0.1 hour) calculated from global radiation (-1 for <0.05 hour) # SP : Percentage van de langst mogelijke zonneschijnduur / Percentage of maximum potential sunshine duration # Q : Globale straling (in J/cm2) / Global radiation (in J/cm2) # DR : Duur van de neerslag (in 0.1 uur) / Precipitation duration (in 0.1 hour) # RH : Etmaalsom van de neerslag (in 0.1 mm) (-1 voor <0.05 mm) / Daily precipitation amount (in 0.1 mm) (-1 for <0.05 mm) column_replacements = [ [0, "STN"], [1, "YYYYMMDD"], [2, "temp_avg"], [3, "temp_min"], [4, "temp_max"], [5, "T10N"], [6, "zonneschijnduur"], [7, "perc_max_zonneschijnduur"], [8, "glob_straling"], [9, "neerslag_duur"], [10, "neerslag_etmaalsom"], ] for c in column_replacements: df = df.rename(columns={c[0]: c[1]}) df["YYYYMMDD"] = pd.to_datetime(df["YYYYMMDD"], format="%Y%m%d") df["YYYY"] = df["YYYYMMDD"].dt.year df["MM"] = df["YYYYMMDD"].dt.month df["DD"] = df["YYYYMMDD"].dt.day df["dayofyear"] = df["YYYYMMDD"].dt.dayofyear df["count"] = 1 month_long_to_short = { "January": "Jan", "February": "Feb", "March": "Mar", "April": "Apr", "May": "May", "June": "Jun", "July": "Jul", "August": "Aug", "September": "Sep", "October": "Oct", "November": "Nov", "December": "Dec", } month_number_to_short = { "1": "Jan", "2": "Feb", "3": "Mar", "4": "Apr", "5": "May", "6": "Jun", "7": "Jul", "8": "Aug", "9": "Sep", "10": "Oct", "11": "Nov", "12": "Dec", } df["month"] = df["MM"].astype(str).map(month_number_to_short) df["year"] = df["YYYY"].astype(str) df["month"] = df["month"].astype(str) df["day"] = df["DD"].astype(str) df["month_year"] = df["month"] + " - " + df["year"] df["month_day"] = df["month"] + " - " + df["day"] to_divide_by_10 = [ "temp_avg", "temp_min", "temp_max", "zonneschijnduur", "neerslag_duur", "neerslag_etmaalsom", ] for d in to_divide_by_10: try: df[d] = df[d] / 10 except: df[d] = None return df, url
""" # DICOM Header Viewer with Filter This is a small example of the power of the `streamlit` library. ## Uploading a DICOM file Begin by uploading a DICOM file """ dicom_bytes = st.file_uploader("Upload DICOM file", encoding=None) if not dicom_bytes: raise st.stop() try: dicom_header = pydicom.read_file(dicom_bytes, force=True, stop_before_pixels=True) except: st.write(WrongFileType("Does not appear to be a DICOM file")) raise st.stop() """ ## Filtering and Viewing the DICOM header """ filter_string = st.text_input("Filter headers by typing here") view = dicom_header.__repr__().split("\n")
def write(self): st.title("Create") obj_type = st.selectbox("Select object to create", [CreatePage.COLL_OPT, CreatePage.DOC_OPT]) if obj_type == CreatePage.COLL_OPT: db_name = st.selectbox( "Select Database", self.db_client.get_database_names() + [CreatePage.CREATE_NEW_DB_OPT], ) if db_name == CreatePage.CREATE_NEW_DB_OPT: db_name = st.text_input("Database Name ?") if (db_name == "" and not any(char in db_name for char in {"$", "\\", "/", ".", " ", '"'}) and len(db_name) < 64): st.info( "Database Names cant be empty. cant have '$', '\\', '/', '.', 'space', '\"' (quotes)' and length must be less than 64" ) st.stop() coll_name = st.text_input("Collection Name ?") if coll_name == "" and not any(char in coll_name for char in {"$", "system."}): st.info( "Collection Names cant be empty. cant have '$' and cant have 'system.'" ) st.stop() if st.button("Create"): self.db_client.create_collection(db_name=db_name, coll_name=coll_name) elif obj_type == CreatePage.DOC_OPT: db_name = st.selectbox( "Select Database", self.db_client.get_database_names() + [CreatePage.CREATE_NEW_DB_OPT], ) coll_name = None if db_name == CreatePage.CREATE_NEW_DB_OPT: db_name = st.text_input("Database Name ?") if (db_name == "" and not any(char in db_name for char in {"$", "\\", "/", ".", " ", '"'}) and len(db_name) < 64): st.info( "Database Names cant be empty. cant have '$', '\\', '/', '.', 'space', '\"' (quotes)' and length must be less than 64" ) st.stop() coll_name = st.text_input("Collection Name ?") if coll_name == "" and not any(char in coll_name for char in {"$", "system."}): st.info( "Collection Names cant be empty. cant have '$' and cant have 'system.'" ) st.stop() else: coll_name = st.selectbox( "Select Collection", self.db_client.get_collection_names(db_name=db_name) + [CreatePage.CREATE_NEW_COLL_OPT], ) if coll_name == CreatePage.CREATE_NEW_COLL_OPT: coll_name = st.text_input("Collection Name ?") if coll_name == "" and not any(char in coll_name for char in {"$", "system."}): st.info( "Collection Names cant be empty. cant have '$' and cant have 'system.'" ) st.stop() document = st.text_area("Document(s) data ?") try: document = json.loads(document) except JSONDecodeError: st.warning("Must be a valid JSON.") st.stop() if st.button("Create"): self.db_client.insert_docs(db_name, coll_name, document) see_doc = st.checkbox("See Document(s) ?") if see_doc: st.write(document)
def make_age_graph(df, d, columns_original, legendanames, titel): if d is None: st.warning("Choose ages to show") st.stop() with _lock: color_list = [ "#3e5c76", # blue 6, "#ff6666", # reddish 0 "#ac80a0", # purple 1 "#3fa34d", # green 2 "#EAD94C", # yellow 3 "#EFA00B", # orange 4 "#7b2d26", # red 5 "#e49273", # dark salmon 7 "#1D2D44", # 8 "#02A6A8", "#4E9148", "#F05225", "#024754", "#FBAA27", "#302823", "#F07826", ] # df = agg_ages(df) fig1y, ax = plt.subplots() for i, d_ in enumerate(d): #if d_ == "TOTAAL_index": if d_[:6] == "TOTAAL": ax.plot(df["Date_of_statistics_week_start"], df[d_], color=color_list[0], label=columns_original[i], linestyle="--", linewidth=2) ax.plot(df["Date_of_statistics_week_start"], df[columns_original[i]], color=color_list[0], alpha=0.5, linestyle="dotted", label='_nolegend_', linewidth=2) else: ax.plot(df["Date_of_statistics_week_start"], df[d_], color=color_list[i + 1], label=columns_original[i]) ax.plot(df["Date_of_statistics_week_start"], df[columns_original[i]], color=color_list[i + 1], alpha=0.5, linestyle="dotted", label='_nolegend_') plt.legend() if y_zero == True: ax.set_ylim(bottom=0) titel_ = titel + " (weekcijfers)" plt.title(titel_) plt.xticks(rotation=270) ax.text( 1, 1.1, "Created by Rene Smit — @rcsmit", transform=ax.transAxes, fontsize="xx-small", va="top", ha="right", ) # plt.tight_layout() # plt.show() st.pyplot(fig1y)
def main(): if platform.processor() != "": arr = os.listdir( "C:\\Users\\rcxsm\\Documents\\phyton_scripts\\streamlit_scripts") else: arr = os.listdir() counter = 1 options = [["0. welcome", "welcome"], ["1. newagebullshitgenerator", "newagebullshitgenerator"], ["2. KNMI grafieken", "show_knmi"], ["3. Text generator", "txt_generator_streamlit"], ["4. YT transcriber", "YoutubeTranscriber_streamlit"], ["5. Schoonmaaktijden", "schoonmaaktijden"], ["6. Show sportactivities", "show_sportactivities"], ["7. YFinance info", "yfinance_info"], ["8. Crypto portfolio", "crypto_portfolio"], ["9. strftime_test", "strftime_test"]] # for file in arr: # if file[-2:] =="py" and ( file != "welcome.py" and file !="menu_streamlit.py"): # menutext = f"{counter}. {file}" # menutext = menutext.replace("_"," ") # I was too lazy to change it in the list # menutext = menutext.replace(".py","") # I was too lazy to change it in the list # file_ = file.replace(".py","") # I was too lazy to change it in the list # options.append([menutext, file_]) # counter +=1 query_params = st.experimental_get_query_params( ) # reading the choice from the URL.. choice = int( query_params["choice"][0] ) if "choice" in query_params else 0 # .. and make it the default value menuchoicelist = [options[n][0] for n, l in enumerate(options)] with st.sidebar.expander( 'MENU: Choose a script | scroll down for options/parameters', expanded=True): menu_choice = st.radio("", menuchoicelist, index=choice) st.sidebar.markdown("<h1>- - - - - - - - - - - - - - - - - - </h1>", unsafe_allow_html=True) st.experimental_set_query_params(choice=menuchoicelist.index( menu_choice)) # setting the choice in the URL for n, l in enumerate(options): if menu_choice == options[n][0]: if platform.processor() != "": m = "C:\\Users\\rcxsm\\Documents\\phyton_scripts\\streamlit_scripts\\" + options[ n][1].replace( " ", "_") # I was too lazy to change it in the list st.write(f"{m }") else: m = options[n][1].replace( " ", "_") # I was too lazy to change it in the list try: module = dynamic_import(m) except Exception as e: st.error(f"Module '{m}' not found or error in the script\n") st.warning(f"{e}") st.warning(traceback.format_exc()) st.stop() try: module.main() except Exception as e: st.error( f"Function 'main()' in module '{m}' not found or error in the script" ) st.warning(f"{e}") st.warning(traceback.format_exc()) st.stop()
def main(): lijst = [ "0-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+", "Unknown", "0-29", "30-49", "50-69", "70-89", "90+", "30-69", "0-39", "40-59", "60-79", "80+", "0-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+", "TOTAAL" ] population = [ 2707000, 1029000, 1111000, 1134000, 1124000, 1052000, 1033000, 1131000, 1285000, 1263000, 1138000, 1003000, 971000, 644000, 450000, 259000, 130000, 10, 5981000, 4340000, 4689000, 2_324_000, 130000, 9029000, 8157000, 4712000, 3756000, 839000, # 0-9 10-19 20-29 30-39 40-49 1756000, 1980000, 2245000, 2176000, 2164000, #50-59 60-69 70-79 80-89 90+ 2548000, 2141000, 1615000, 709000, 130000, 17464000 ] # tot 17 464 000 st.header("Hospital / ICU admissions in the Netherlands") st.subheader("Please send feedback to @rcsmit") # DAILY STATISTICS ################ start_ = "2020-01-01" today = datetime.today().strftime("%Y-%m-%d") global from_, FROM, UNTIL from_ = st.sidebar.text_input("startdate (yyyy-mm-dd)", start_) try: FROM = dt.datetime.strptime(from_, "%Y-%m-%d").date() except: st.error( "Please make sure that the startdate is valid and/or in format yyyy-mm-dd" ) st.stop() until_ = st.sidebar.text_input("enddate (yyyy-mm-dd)", today) try: UNTIL = dt.datetime.strptime(until_, "%Y-%m-%d").date() except: st.error("Please make sure that the enddate is in format yyyy-mm-dd") st.stop() if FROM >= UNTIL: st.warning("Make sure that the end date is not before the start date") st.stop() if until_ == "2023-08-23": st.sidebar.error("Do you really, really, wanna do this?") if st.sidebar.button("Yes I'm ready to rumble"): caching.clear_cache() st.success("Cache is cleared, please reload to scrape new values") global WDW2 WDW2 = st.sidebar.slider("Window smoothing curves (weeks)", 1, 8, 1) global delete_last_row delete_last_row = st.sidebar.selectbox( "Delete last week/row of complete dataset", [True, False], index=0) df_pivot_hospital, df_pivot_ic = prepare_data() df_pivot_hospital = select_period(df_pivot_hospital, "Date_of_statistics_week_start", FROM, UNTIL) df_pivot_ic = select_period(df_pivot_ic, "Date_of_statistics_week_start", FROM, UNTIL) df_pivot_hospital_basic = df_pivot_hospital.copy(deep=False) df_pivot_ic_basic = df_pivot_ic.copy(deep=False) df_pivot_hospital = agg_ages(df_pivot_hospital) df_pivot_ic = agg_ages(df_pivot_ic) save_df(df_pivot_hospital, "hospital_voor_maarten") save_df(df_pivot_ic, "ic_voor_maarten") df_pivot_casus_landelijk_per_week = make_pivot_casus_landelijk_per_week() save_df(df_pivot_casus_landelijk_per_week, "casus_per_age_per_week_voor_maarten") hospital_or_ic = st.sidebar.selectbox("Hospital or IC", ["hospital", "icu"], index=0) what_to_do = st.sidebar.selectbox("What type of graph", ["stack", "line"], index=1) default_age_groups = ["0-29", "30-49", "50-69", "70-89", "90+"] default_age_groups_perc = [ "0-29_perc", "30-49_perc", "50-69_perc", "70-89_perc", "90+_perc" ] default_age_groups_cumm_all = [ "0-29_cumm_all", "30-49_cumm_all", "50-69_cumm_all", "70-89_cumm_all", "90+_cumm_all" ] default_age_groups_cumm_period = [ "0-29_cumm_period", "30-49_cumm_period", "50-69_cumm_period", "70-89_cumm_period", "90+_cumm_period" ] default_age_groups_per_capita = [ "0-29_per_capita", "30-49_per_capita", "50-69_per_capita", "70-89_per_capita", "90+_per_capita" ] if what_to_do == "line": age_groups = ["0-29", "30-49", "50-69", "70-89", "90+", "TOTAAL"] absolute_or_index = st.sidebar.selectbox( f"Absolute | percentages of TOTAAL |\n index (start = 100) | per capita | cummulatief from 2020-1-1 | cummulatief from {FROM}", [ "absolute", "percentages", "index", "per_capita", "cummulatief_all", "cummulatief_period" ], index=0) normed = absolute_or_index == "index" if absolute_or_index == "percentages": ages_to_show = st.sidebar.multiselect( "Ages to show (multiple possible)", lijst_perc, default_age_groups_perc) elif absolute_or_index == "cummulatief_all": ages_to_show = st.sidebar.multiselect( "Ages to show (multiple possible)", lijst_cumm_all, default_age_groups_cumm_all) elif absolute_or_index == "cummulatief_period": ages_to_show = st.sidebar.multiselect( "Ages to show (multiple possible)", lijst_cumm_period, default_age_groups_cumm_period) elif absolute_or_index == "per_capita": ages_to_show = st.sidebar.multiselect( "Ages to show (multiple possible)", lijst_per_capita, default_age_groups_per_capita) else: # absolute ages_to_show = st.sidebar.multiselect( "Ages to show (multiple possible)", lijst, default_age_groups) else: #stackplot absolute_or_relative = st.sidebar.selectbox( "Absolute or relative (total = 100%)", ["absolute", "relative"], index=0) ages_to_show = st.sidebar.multiselect( "Ages to show (multiple possible)", lijst, default_age_groups) if len(ages_to_show) == 0: st.warning("Choose ages to show") st.stop() global y_zero y_zero = st.sidebar.selectbox("Y-ax starts at 0", [True, False], index=1) if what_to_do == "stack": # SHOW STACKGRAPHS if hospital_or_ic == "hospital": to_do_stack = [[ df_pivot_hospital, ages_to_show, "ziekenhuisopname naar leeftijd" ]] else: to_do_stack = [[ df_pivot_ic, ages_to_show, "IC opname naar leeftijd" ]] for d in to_do_stack: show_stack(d[0], d[1], d[2], absolute_or_relative) elif what_to_do == "line": # SHOW LINEGRAPHS if normed == True: df_pivot_hospital, d = normeren(df_pivot_hospital, ages_to_show) df_pivot_ic, d = normeren(df_pivot_ic, ages_to_show) else: d = ages_to_show if hospital_or_ic == "hospital": show_age_graph(df_pivot_hospital, d, "ziekenhuisopnames") else: show_age_graph(df_pivot_ic, d, "IC opnames") else: st.error("ERROR") st.stop if hospital_or_ic == "hospital": st.subheader("Ziekenhuisopnames (aantallen)") st.write(df_pivot_hospital_basic) df_new = do_the_rudi(df_pivot_hospital_basic) st.write( df_new.style.format( None, na_rep="-").applymap(color_value).set_precision(2)) #st.dataframe(df_new.style.applymap(color_value)) else: st.subheader("Ziekenhuisopnames (aantallen)") st.write(df_pivot_ic_basic) df_new = do_the_rudi(df_pivot_ic_basic) st.dataframe(df_new.style.applymap(color_value)) tekst = ( "<style> .infobox { background-color: lightblue; padding: 5px;}</style>" "<hr><div class='infobox'>Made by Rene Smit. (<a href='http://www.twitter.com/rcsmit' target=\"_blank\">@rcsmit</a>) <br>" 'Data source : <a href="https://data.rivm.nl/covid-19/COVID-19_ziekenhuis_ic_opnames_per_leeftijdsgroep.csv" target="_blank">RIVM</a> (daily retrieved)<br>' 'Sourcecode : <a href="https://github.com/rcsmit/COVIDcases/edit/main/plot_hosp_ic_streamlit.py" target="_blank">github.com/rcsmit</a><br>' 'How-to tutorial : <a href="https://rcsmit.medium.com/making-interactive-webbased-graphs-with-python-and-streamlit-a9fecf58dd4d" target="_blank">rcsmit.medium.com</a><br>' ) st.sidebar.markdown(tekst, unsafe_allow_html=True) st.markdown("<hr>", unsafe_allow_html=True) st.image( "https://raw.githubusercontent.com/rcsmit/COVIDcases/main/buymeacoffee.png" ) st.markdown( '<a href="https://www.buymeacoffee.com/rcsmit" target="_blank">If you are happy with this dashboard, you can buy me a coffee</a>', unsafe_allow_html=True, )
def handle_hop_url(url_params, pathSession): '''Handle table display associated with hop''' namespace = url_params.get('namespace', [""])[0] hostname = url_params.get('hostname', [""])[0] if not hostname: st.error('No hostname found to display information for') st.stop() st.header(f'Debug Tables for Path from {pathSession.source} to ' f'{pathSession.dest}') pathobj = getattr(pathSession, 'pathobj', None) df = getattr(pathSession, 'path_df', None) engobj = pathobj.engine_obj if df.empty: st.warning('Empty path dataframe') st.stop() host_dfg = df.query(f'hostname == "{hostname}"') \ .groupby(by=['hopCount']) df2 = host_dfg.agg({ 'vrf': ['unique'], 'ipLookup': ['unique'], 'nexthopIp': ['unique'], 'oif': ['unique'], 'macLookup': ['unique'], 'vtepLookup': ['unique'] }).reset_index() df2.columns = [ 'hopCount', 'vrf', 'ipLookup', 'nexthopIp', 'oif', 'macaddr', 'vtepLookup' ] df2 = df2.explode('hopCount').explode('vrf').explode('ipLookup') \ .explode('macaddr') \ .explode('vtepLookup') df2.drop_duplicates(subset=['vrf', 'ipLookup'], inplace=True) for row in df2.itertuples(): with st.beta_expander( f'Lookups on {hostname}, for hopcount: ' f'{row.hopCount}', expanded=True): if row.macaddr: st.info(f'MAC Table on {hostname}, MAC addr {row.macaddr}') st.dataframe(data=engobj._macsobj.get(namespace=namespace, hostname=hostname, macaddr=row.macaddr)) continue if (row.ipLookup != row.vtepLookup): st.info(f'Route Lookup on {hostname}') st.dataframe(data=engobj._rdf.query( f'hostname=="{hostname}" and vrf=="{row.vrf}"')) if row.vtepLookup: st.info(f'Underlay Lookup on {hostname} for {row.vtepLookup}') vtepdf = engobj._underlay_dfs.get(row.vtepLookup, pd.DataFrame()) if not vtepdf.empty: st.dataframe(data=vtepdf.query( f'hostname=="{hostname}" and vrf=="default"')) oifs = row.oif.tolist() nhops = row.nexthopIp.tolist() prev_nhop = '' for oif, nhop in zip_longest(oifs, nhops): blank1, arpcol = st.beta_columns([1, 40]) blank2, ifcol = st.beta_columns([2, 40]) # this logic because I don't know what fn to use with agg above # to not remove non-unique nhop. if not nhop and prev_nhop: nhop = prev_nhop else: prev_nhop = nhop arpdf = engobj._arpnd_df.query(f'hostname=="{hostname}" and ' f'ipAddress=="{nhop}" and ' f'oif=="{oif}"') with arpcol: st.info(f'ARP/ND Lookup on {hostname} for {nhop}') st.dataframe(data=arpdf, height=100) if not arpdf.empty: if ':' in nhop: dropcol = ['ipAddressList'] else: dropcol = ['ip6AddressList'] if nhop == '169.254.0.1': macaddr = arpdf.macaddr.iloc[0] if_df = engobj._if_df.query(f'macaddr=="{macaddr}"') \ .drop(columns=dropcol) label = f'matching nexthop {nhop}, macaddr {macaddr}' else: if_df = engobj._if_df.drop(columns=dropcol) label = f'matching nexthop {nhop}' else: label = f'matching nexthop {nhop}' if_df = engobj._if_df if ':' in nhop: s = if_df.ip6AddressList \ .explode() \ .str.startswith(f'{nhop}/').dropna() s = s.loc[s == True] if_df = if_df.iloc[s.loc[s == True].index] elif nhop != '169.254.0.1': s = if_df.ipAddressList \ .explode() \ .str.startswith(f'{nhop}/').dropna() s = s.loc[s == True] if_df = if_df.iloc[s.loc[s == True].index] with ifcol: st.info(f'Interfaces {label}') st.dataframe(data=if_df, height=600) st.markdown("<hr>", unsafe_allow_html=True)
def main(): df = read() acco_codes = ["all", "w", "sa", "se", "k", "b"] acco_names = ["All", "Waikiki", "Sahara", "Serengeti", "Kalahari", "Bali"] # distributions = ["weibull_min", "exponweib"] # distribution_to_use = st.sidebar.selectbox( # "Which distribution to use", # distributions, # index=0) # exponweib doesnt work properly distribution_to_use = "weibull_min" # distribution_to_use = "exponweib" st.title(f"Schoonmaaktijden gefit aan Weibull verdeling") menu_choice = st.sidebar.radio( "", ["ALL", "animated", "never cleaned", "edit sheet", "show formulas"], index=0) binwidth = st.sidebar.slider("Binwidth", 1, 20, 6) st.sidebar.write( "Attention: Guests are supposed to leave the accomodation clean behind as they found it. These cleaning times are in fact 'make perfect'-times !" ) st.sidebar.write( "Google sheet : https://docs.google.com/spreadsheets/d/1Lqddg3Rsq0jhFgL5U-HwvDdo0473QBZtjbAp9ol8kcg/edit#gid=0" ) st.sidebar.write( "Broncode : https://github.com/rcsmit/streamlit_scripts/schoonmaaktijden.py" ) if menu_choice == "ALL": show_various_plots(df, acco_codes, acco_names, distribution_to_use, binwidth) elif menu_choice == "edit sheet": edit_sheet() elif menu_choice == "never cleaned": check_accos_never_cleaned(df) elif menu_choice == "animated": show_animation(df, acco_codes, acco_names, distribution_to_use, binwidth) elif menu_choice == "show formulas": st.header("Formulas") #st.write ("distribution: y = (shape/scale) * ((x/scale)**(shape - 1)) * np.exp(-1*((x/scale)**shape)) ") st.write( "PDF - probability density function : y = (shape/scale) * ((x/scale)**(shape - 1)) * np.exp(-1*((x/scale)**shape))" ) st.write( "CDF - cummulative distribution function: y = 1 - (np.exp(- (x/scale)**shape))" ) st.subheader( "From percentage to time (x % of the cleans is under y minutes)") st.write( "PPF - Percentual point function: q = 1-p | y = scale * (-1 * np.log(q))**(1/shape)" ) st.subheader("Discrete / steps") st.write( "PMF - probability mass function : a = np.exp(-1*(x/scale)**shape) | b =np.exp(-1*((x+step)/scale)**shape) | y = a-b" ) st.write( "CDF - Cummulative distribution function : b =np.exp(-1*((x+1)/scale)**shape) | y = (1-b)" ) st.subheader("Various") st.write("cumm_hazard : y = (x/scale)**shape") st.write( "mean : n = (1+ (1/shape)) | gamma = math.gamma(n) | y = scale*gamma" ) st.write( "pdf_not_used : x_min_1 = 1-np.exp(-1*((x-1/scale)**shape)) | xx = 1-np.exp(-1*((x/scale)**shape))| y = (x_min_1 - xx)" ) st.subheader("Extra info") st.write( " the shape parameter describes the shape of your data’s distribution. Statisticians also refer to it as the Weibull slope because its value equals the slope of the line on a probability plot. Shape value of 2 equals a Rayleigh distribution, which is equivalent to a Chi-square distribution with two degrees of freedom. Shape value near of 3 approximates the normal distribution" ) st.write( "The scale parameter represents the variability present in the distribution. The value of the scale parameter equals the 63.2 percentile in the distribution. 63.2% of the values in the distribution are less than the scale value." ) st.write( "https://statisticsbyjim.com/probability/weibull-distribution/") st.subheader("Links") st.write( "https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.weibull_min.html" ) st.write( "https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Weibull.html" ) st.write( "https://www.sciencedirect.com/topics/computer-science/weibull-distribution" ) st.write( "https://www.itl.nist.gov/div898/handbook/eda/section3/eda3668.htm" ) else: st.write("ËRROR") st.stop()
def discovery_page(state): logger.info({"message": "Loading Intents Discovery page."}) st.title("Intents Discovery") st.markdown(""" When you start to develop your chatbot or even you are developing more intents, you will need to analyze user messages to map the new intents. This activity is a hard work task for each chatbot developer, but here we have a method to help you. We'll use an unsupervised learning algorithm that will try to cluster user messages into topics. The process is done in two stages: - Find the best number of clusters (n_clusters); - Run the unsupervised algorithm with the best number of clusters. """) st.image('images/clusters.gif') st.write( "Your job will be analyze the examples of each topic and understand if these examples should be a new intent or not." ) # Initialize unlabeled_examples unlabeled_examples = None sim_option = st.radio('Where do we get unlabeled messages?', options=["Watson Assistant", "Import file"]) if sim_option == "Import file": st.markdown(""" File format ``` I want to make a request how to cancel an order I need to schedule a visit ... ``` """) uploaded_file = st.file_uploader("Attach file", type=["csv", "xlsx"]) if uploaded_file is not None: df = read_df(uploaded_file, cols_names=["examples"]) unlabeled_examples = df["examples"].tolist() elif sim_option == "Watson Assistant": if st.button("Get logs"): # Getting Watson logs st.write("Loading Watson Assistant logs.") from src.connectors.watson_assistant import WatsonAssistant wa = WatsonAssistant( apikey=state.watson_args["apikey"], service_endpoint=state.watson_args["endpoint"], default_skill_id=state.watson_args["skill_id"]) logs = wa.get_logs() if len(logs) > 0: state.discovery_data = pd.DataFrame(prepare_logs(logs)) else: logger.error({ "message": "It's seems that this skill has no logs available." }) st.error("It's seems that this skill has no logs available.") st.stop() if isinstance(state.discovery_data, pd.DataFrame): if len(state.discovery_data) > 0: data = state.discovery_data max_words = data["input_words"].max() min_words = data["input_words"].min() sliders = { "confidence": st.slider('Confidence', min_value=0.0, max_value=1.0, value=(0.3, 0.6), step=0.01), "input_words": st.slider('Input words', min_value=max_words, max_value=max_words, value=(min_words, max_words), step=1) } data = data[(data["confidence"] >= sliders["confidence"][0]) & (data["confidence"] <= sliders["confidence"][1]) & (data["input_words"] >= sliders["input_words"][0]) & (data["input_words"] <= sliders["input_words"][1])] st.write("Selected messages: {}".format(len(data))) st.write(data) unlabeled_examples = data["input"].tolist() if unlabeled_examples != None: if st.button("Run analysis"): st.write("## Working on the data") st.write("We are preparing the data, this may take some time.") # imports import plotly.express as px from src.intents.discovery import IntentsDiscovery # Instantiate an object of IntentsDiscovery class intents_discovery = IntentsDiscovery(data=unlabeled_examples, spacy_model=state.spacy_model) # Apply preprocessing on dataset if isinstance(state.stopwords, list): intents_discovery.text_processing(stopwords=state.stopwords, inplace=True) # Find best n_clusters st.write("Starting tests to find the best `n_clusters`.") intents_discovery.search_n_clusters() clustering_data = intents_discovery.clustering( n_clusters=intents_discovery.n_clusters) df = pd.DataFrame({ "examples": clustering_data["data"], "labels": clustering_data["labels"] }) st.markdown(""" ## Silhouette score To evaluate how the unsupervised model is performing, we’ll use [Silhouette](https://en.wikipedia.org/wiki/Silhouette_(clustering)) score. """) df_score = pd.DataFrame(intents_discovery.search_data) st.plotly_chart(px.line(df_score, x="n_clusters", y="silhouette_score", title="Silhouette score"), use_container_width=True) st.markdown(""" ## Clustered messages See below the clustered messages or download it as csv file. """) link = download_link(df, "clustered_examples.csv", "Download CSV file") st.markdown(link, unsafe_allow_html=True) st.dataframe(df) st.markdown(""" ## Topics Below we can see the topics that were found. """) df_topics = df.groupby("labels").count() df_topics.sort_values("examples", inplace=True, ascending=True) df_topics.reset_index(inplace=True) fig_title = "{} topics for {} messages.".format( len(df_topics), df_topics["examples"].sum()) fig = px.bar(df_topics, x="examples", y="labels", orientation="h", hover_name="labels", hover_data=["labels"], title=fig_title) fig.layout.update(showlegend=False) st.plotly_chart(fig, use_container_width=True) state.sync()
def main(): # online version : https://data.rivm.nl/covid-19/COVID-19_casus_landelijk.csv df_getdata = load_data() df = df_getdata.copy( deep=False ) # prevent an error [Return value of `prepare_data()` was mutated between runs.] start_ = "2021-05-01" today = datetime.today().strftime("%Y-%m-%d") global from_, FROM, UNTIL from_ = st.sidebar.text_input("startdate (yyyy-mm-dd)", start_) try: FROM = dt.datetime.strptime(from_, "%Y-%m-%d").date() except: st.error( "Please make sure that the startdate is valid and/or in format yyyy-mm-dd" ) st.stop() until_ = st.sidebar.text_input("enddate (yyyy-mm-dd)", today) try: UNTIL = dt.datetime.strptime(until_, "%Y-%m-%d").date() except: st.error("Please make sure that the enddate is in format yyyy-mm-dd") st.stop() if FROM >= UNTIL: st.warning("Make sure that the end date is not before the start date") st.stop() if until_ == "2023-08-23": st.sidebar.error("Do you really, really, wanna do this?") if st.sidebar.button("Yes I'm ready to rumble"): caching.clear_cache() st.success("Cache is cleared, please reload to scrape new values") df.rename( columns={ "Date_file": "count", }, inplace=True, ) # df_hospital = df[df["Hospital_admission"] == "Yes"].copy(deep=False) # df_deceased = df[df["Deceased"] == "Yes"].copy(deep=False) df = select_period(df, "Date_statistics", FROM, UNTIL) df_pivot = (pd.pivot_table( df, values="count", index=["Date_statistics"], columns=["Agegroup"], aggfunc=np.sum, ).reset_index().copy(deep=False)) df_pivot["TOTAAL"] = df_pivot["0-9"] + df_pivot["10-19"] + df_pivot[ "20-29"] + df_pivot["30-39"] + df_pivot["40-49"] + df_pivot[ "50-59"] + df_pivot["60-69"] + df_pivot["70-79"] + df_pivot[ "80-89"] + df_pivot["90+"] # df_pivot_hospital = ( # pd.pivot_table( # df_hospital, # values="count", # index=["Date_statistics"], # columns=["Agegroup"], # aggfunc=np.sum, # ) # .reset_index() # .copy(deep=False) # ) # df_pivot_deceased = ( # pd.pivot_table( # df_deceased, # values="count", # index=["Date_statistics"], # columns=["Agegroup"], # aggfunc=np.sum, # ) # .reset_index() # .copy(deep=False) # ) #df_pivot = df_pivot.add_prefix("pos_test_") # df_pivot_hospital = df_pivot_hospital.add_prefix("hosp_") # save_df(df_pivot_hospital, "df_hospital_per_dag") # df_pivot_deceased = df_pivot_deceased.add_prefix("deceased_") # print(df_pivot_deceased.dtypes) todrop = [ "Date_statistics_type", "Sex", "Province", "Hospital_admission", "Deceased", "Week_of_death", "Municipal_health_service", ] df = drop_columns(df, todrop) # save_df(df, "landelijk_leeftijd_2") # save_df(df_pivot, "landelijk_leeftijd_pivot") #save_df(df_pivot_hospital, "landelijk_leeftijd_pivot_hospital") #save_df(df_pivot_deceased, "landelijk_leeftijd_pivot_deceased") # df_temp = pd.merge( # df_pivot, # df_pivot_hospital, # how="outer", # left_on="pos_test_Date_statistics", # right_on="hosp_Date_statistics", # ) # df_temp = pd.merge( # df_temp, # df_pivot_deceased, # how="outer", # left_on="pos_test_Date_statistics", # right_on="deceased_Date_statistics", # ) #df_temp_per_week = df_temp.groupby(pd.Grouper(key='pos_test_Date_statistics', freq='W')).sum() #df_temp_per_week.index -= pd.Timedelta(days=6) #print(df_temp_per_week) #df_temp_per_week["weekstart"]= df_temp_per_week.index #save_df(df_temp, "final_result") #save_df(df_temp_per_week, "final_result_per_week") lijst = [ "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+", "TOTAAL" ] ages_to_show = st.sidebar.multiselect("Ages to show (multiple possible)", lijst, lijst) global WDW2 df = df_pivot.copy(deep=False) t = "SMA" tg = st.sidebar.slider("Generation time", 1, 7, 4) d = st.sidebar.slider("Look back how many days", 1, 14, 7) WDW2 = st.sidebar.slider("Window smoothing curves (days)", 1, 45, 7) centersmooth = st.sidebar.selectbox("Smooth in center", [True, False], index=1) df, smoothed_columns = smooth_columnlist(df, ages_to_show, t, WDW2, centersmooth) df, column_list_r_smoothened = add_walking_r(df, smoothed_columns, "Date_statistics", t, WDW2, tg, d) make_age_graph(df, column_list_r_smoothened, lijst, "R getal naar leeftijd") st.write( "Attentie: DIt is het R getal op basis van moment van rapportage. RIVM berekent het R getal over het moment van besmetting of eerste symptomen" )
def main(): state.re_init(current_mod_name) if st.checkbox("显示说明", value=True): st.markdown(show_describtion()) state.func2_app['answer_dir'] = st.text_input( "输入标准答案样本的目录", state.func2_app['answer_dir']) state.func2_app['pending_dir'] = st.text_input( "输入待查答案样本的目录", state.func2_app['pending_dir']) show_widget_1 = st.empty() if not (os.path.exists(state.func2_app['answer_dir']) and os.path.exists(state.func2_app['pending_dir'])): show_widget_1.warning('xml 目录不存在') else: small_thr = None ignore_small = False if st.checkbox('启用宽容小目标模式'): if st.checkbox('启用忽略小目标'): ignore_small = True st.markdown(''' 宽容小目标模式下,对于尺寸小于预设值的目标,在计算iou指标的时候会降低, \n 具体计算方式是在当前阈值下-0.3 和0.2取最大值 \n 若启用忽略小目标,那么小于尺寸的目标将不再考虑 ''') small_thr = st.number_input(' 最小面积比例(%)', min_value=1.0, max_value=100.0, value=5.0, step=0.1) iou_thr = show_widget_1.slider('请选择IOU阈值:', min_value=0.0, max_value=1.0, value=0.5, step=0.1) show_widget_2 = st.info('计算结果中!') print('current count dir is {}'.format(state.func2_app['pending_dir'])) sub_pend_dirs = utils.get_sub_dir(state.func2_app['pending_dir']) print('sub_dir get done!', sub_pend_dirs) if not sub_pend_dirs: show_widget_2.text(' 待查目录子目录为空,请组织成正确的目录结构') st.stop() show_widget_2.text('获取子目录') result_dict = {} ans_xml_dir = utils.get_son_dir(state.func2_app['answer_dir']) for sub_dir in sub_pend_dirs: print('count!!!!') show_widget_2.info('{}结果计算中'.format(sub_dir)) result = deal_one_sub_pend_dir(sub_dir, ans_xml_dir, iou_thr, small_thr, ignore_small, placeholder_widget=show_widget_2) result_dict[sub_dir] = result result_str = show_result(result_dict) show_widget_2.markdown(result_str) # 显示图片 if st.checkbox(' 是否绘制图片有问题的结果', value=False): persons = {x.split(os.path.sep)[-1]: x for x in result_dict.keys()} current_preson = st.radio("当前显示结果的文件夹", tuple(persons.keys())) current_preson = persons[current_preson] _, wrong_labe_result, _, unaccurate_result, _, _, _, _ = result_dict[ current_preson] wl_show_dict = deal_show_result(wrong_labe_result) ua_show_dict = deal_show_result(unaccurate_result) st.header('显示错误标签的图片') show_pair_result(wl_show_dict) if repr(wl_show_dict) == repr(ua_show_dict): st.text('错误标签和没框准的图片一样,这里不再显示没框准的图片') else: st.header('显示没框准的图片') show_pair_result(ua_show_dict) if st.checkbox('是否导出结果', value=True): save_dir = st.text_input(' 结果保存的目录') if os.path.exists(save_dir): export_result(result_dict, save_dir) st.write('导出完毕')
def app(): # st.title("Home") st.title('Analyze End User License Agreement (EULA) Clauses using Interpretable Machine Learning') st.subheader("Developed by Andrew Mendez") @st.cache def get_pdf(filepath): pages_pdf = extract_clauses_from_pdf(filepath) clauses_pdf = preprocess_clauses_pdf(pages_pdf) return clauses_pdf @st.cache def get_docx(filepath): return get_text_from_docx(filepath) @st.cache def get_predictions(clauses): with st.spinner("Loading Model and predicting clause acceptabilty..."): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') PATH_TO_MODEL = '/Users/andrewmendez1/Documents/ai-ml-challenge-2020/data/Finetune BERT oversampling 8_16_2020/Model_1_4_0' best_model,tokenizer = load_model_and_tokenizer(PATH_TO_MODEL,device) ref_token_id = tokenizer.pad_token_id # A token used for generating token reference sep_token_id = tokenizer.sep_token_id # A token used as a separator between question and text and it is also added to the end of the text. cls_token_id = tokenizer.cls_token_id # A token used for prepending to the concatenated question-text word sequence print(device) acceptable_eulas = [] unacceptable_eulas = [] num_pos = 0 for clause in clauses: pred,confidence = get_prediction_and_confidence(best_model,clause,tokenizer,device,ref_token_id,sep_token_id,cls_token_id) if int(pred)==0: num_pos+=1 acceptable_eulas.append([clause,int(pred),float(confidence)]) else: unacceptable_eulas.append([clause,int(pred),float(confidence)]) return acceptable_eulas,unacceptable_eulas, num_pos st.header("Welcome to the EULA Analyzer. This website allows you analyze EULA documents to determine whether terms and conditions are acceptable to the government.") st.write(" ") st.subheader("To get started, enter the file path for a PDF or Word document. Then, press Enter to upload.") filename = st.text_input(r'Example on Mac - /Users/amendez/MyDocuments/EULAS/testEULA.docx ; Example on Windows - C:\Users\amendez\MyDocuments\EULAS\testEULA.docx') if not filename: st.warning('Please upload file') st.stop() ext = os.path.splitext(filename)[1] name = filename.split("/")[-1] if ext == '.pdf': with st.spinner("Extracting clauses from .pdf..."): clauses = get_pdf(filename) elif ext == '.docx': with st.spinner("Extracting Clauses from .docx..."): clauses = get_docx(filename) st.write(" ") st.subheader("Upload is complete. The system has identified {} clauses.".format(len(clauses))) if st.checkbox("(Optional) Select to show all clauses extracted from the EULA"): st.subheader("Clauses from the {} EULA Document".format(name)) index = st.slider('Click the slider and press left and right arrow keys to explore data.',0,len(clauses),1) st.write(HTML_WRAPPER.format(clauses[index]),unsafe_allow_html=True) st.subheader("Next, select checkbox to analyze EULA terms and conditions for acceptability.") if st.checkbox("Run Model"): st.subheader("Model Results") acceptable_eulas,unacceptable_eulas,num_pos = get_predictions(clauses) plt.bar(['Acceptable','Unacceptable'],np.array([num_pos,len(clauses)-num_pos])) plt.ylabel("Number of Clauses") plt.title("Overview of clauses predicted Acceptable/Unacceptable") plt.show() st.pyplot() st.write("The model has identified {} clauses as Acceptable, and {} clauses as Unacceptable.".format(num_pos,len(clauses)-num_pos)) st.write(" ") st.subheader("Explore Acceptable clauses:") index1 = st.slider('Click the slider and press left and right arrow keys to explore data.',0,len(acceptable_eulas),1) label1 = '' if acceptable_eulas[index1][1] == 0: label1 = 'Acceptable' else: label1 = 'Unacceptable' st.subheader("The model has identified this clause as {} with {:.1f} % confidence.".format(label1,acceptable_eulas[index1][2]*100 )) st.write(HTML_WRAPPER.format(acceptable_eulas[index1][0]),unsafe_allow_html=True) st.write(" ") st.subheader("Explore Unacceptable clauses:") index2 = st.slider('Click the slider and press left and right arrow keys to explore data.',0,len(unacceptable_eulas),1) label2 = '' if unacceptable_eulas[index2][1] == 0: label2 = 'Acceptable' else: label2 = 'Unacceptable' st.subheader("The model has identified this clause as {} with {:.1f} % confidence.".format(label2,unacceptable_eulas[index2][2]*100 )) st.write(HTML_WRAPPER.format(unacceptable_eulas[index2][0]),unsafe_allow_html=True) st.subheader("Explore Model Interpretation") if st.checkbox("Select to see why individual clauses were identified as unacceptable."): # st.write(" Here we are leveraging the IntegratedGradients to interpret model predictions and show specific words that have highest attribution to the model output.") # st.write("Integrated gradients is an axiomatic attribution method that assigns an attribution(i.e. factor) score to each word/token in the clause.") # st.write(" To run, select the predicted unacceptable clause and press Interpret.") st.subheader("Explore why clauses were identified as Unacceptable:") index3 = st.slider('Click the slider and press left and right arrow keys to explore unacceptable.',0,len(unacceptable_eulas),1) st.write(HTML_WRAPPER.format(unacceptable_eulas[index3][0]),unsafe_allow_html=True) text = unacceptable_eulas[index3][0] if st.button("Interpret Prediction"): with st.spinner("Running Model Interpretation Analysis..."): interpret_main(text,"?")
if times == 0: break #---- Apresentando a seleção select = (dados.consumo == 1) colunas = ['nome', 'preco', 'pais', 'tipo', 'descricao', 'uvas', 'rating'] df_temp = dados[select][colunas].sort_values(by=['uvas'], ascending=False) #--- Precisa verificar se existe algo a apresentar !! tam_temp = df_temp.shape[0] if tam_temp == 0: st.write( 'Lamentamos informar que sua seleção não retornou resultados. Tente novamente!' ) st.stop() # else: # st.write('Sua Seleção de Consumo é apresentada abaixo:') # st.table(df_temp) ############################################################################################################# ## Função de Recomendação, baseada no conceito de Vector Space Model, onde um documento de ## texto é transformado em uma representação vetorial, em um espaço multi dimensional. ## Após a transformação, passamos a empregar a Medida de Similaridade de Coseno, afim de determinar ## quais Observações mais se aproximam do conjunto de Preferencias do usuário, dentro do catalogo. ############################################################################################################# # TfidfVectorizer não possui um conjunto interno de stop words em Portugues, mas aceita uma lista # de palavras como parametro. Dessa forma, vamos gerar as stopwords em Portugues, usando o NLTK. # Armazena as stop words em Portugues em uma variavel
def url_data(): about() st.info("This feature has limited functionality") url=st.text_input("Webpage URL",help="Enter a url where your data is placed") if url=="": st.info("Please enter a valid input to get started") st.stop() #getting data Column names as user input column_name=st.text_input("enter candidadte column Name",key="value") value_list=column_name.split(",") #getting data example for refferances candidate=st.text_input("Candidate example value",key="candidates",help="use ; as seperator to enter another value") items_list=candidate.split(";") #st.write(items) # create object scraper = AutoScraper() # feeding for scraping final_result = scraper.build(url,items_list) # display result results=scraper.get_result_similar(url,grouped=True,keep_order=True) result={} for key,value in results.items(): if value not in result.values(): result[key]=value orient_df=pd.DataFrame.from_dict(result,orient="index") df=orient_df.transpose() df.columns=value_list df.fillna(value=pd.np.nan,inplace=True) st.write(df) cols=df.columns.tolist() col1,col2=st.beta_columns(2) target=col1.selectbox("Select Target", cols,key="target") typelist=['binary','multiclass','regression','time series regression','time series multiclass','time series binary'] p_type=col2.selectbox("Select problem type",typelist,key="p_type") st.write("hey") x=df.drop(columns=target) y=df[target] x_train,x_test,y_train,y_test=evalml.preprocessing.split_data(x,y,problem_type=p_type) automl = AutoMLSearch(X_train=x_train, y_train=y_train, problem_type=p_type) automl.search() rank=automl.rankings #checking best pipeline ############################################################### best_pipeline=automl.best_pipeline description=automl.describe_pipeline(automl.rankings.iloc[0]["id"]) ### OPtimize the code ### Evaluate on hold out data problem_list=['binary','time series binary'] problem_list2=['multiclass','time series multiclass'] cola,col_b,colc=st.beta_columns(3) if p_type in problem_list: objective=col_b.selectbox("select objective",objectives().binary_obj,key="objective selector") best_pipeline.score(x_test, y_test, objectives=["auc","f1","Precision","Recall"]) automl_tunned = AutoMLSearch(X_train=x_train, y_train=y_train, problem_type=p_type, objective=objective, additional_objectives=['f1', 'precision'], max_batches=1, optimize_thresholds=True) automl_tunned.search() tunned_rankings=automl_tunned.rankings tunned_description=automl_tunned.describe_pipeline(automl_tunned.rankings.iloc[0]["id"],return_dict=True) tunned_pipeline= automl_tunned.best_pipeline tunned_pipeline.score(x_test, y_test, objectives=[objective]) pred=tunned_pipeline.predict_proba(x_test).to_dataframe() # for multiclass type problem elif p_type in problem_list2: objective=col_b.selectbox("select objective",objectives().multiclass_obj,key="objective selector") best_pipeline.score(x_test, y_test, objectives=["log loss multiclass","MCC multiclass","accuracy multiclass"]) automl_tunned = AutoMLSearch(X_train=x_train, y_train=y_train, problem_type=p_type, objective=objective, additional_objectives=['MCC multiclass', 'accuracy multiclass'], max_batches=1, optimize_thresholds=True) automl_tunned.search() tunned_rankings=automl_tunned.rankings tunned_description=automl_tunned.describe_pipeline(automl_tunned.rankings.iloc[0]["id"],return_dict=True) tunned_pipeline= automl_tunned.best_pipeline tunned_pipeline.score(x_test, y_test, objectives=[objective]) pred=tunned_pipeline.predict(x_test).to_series() # for regression type problems else: objective=col_b.selectbox("select objective",objectives().regression_obj,key="objective selector") best_pipeline.score(x_test, y_test, objectives=["r2","MSE","MAE","Root Mean Squared Error"]) automl_tunned = AutoMLSearch(X_train=x_train, y_train=y_train, problem_type=p_type, objective=objective, additional_objectives=['Root Mean Squared Error', 'MSE','MAE'], max_batches=1, optimize_thresholds=True) automl_tunned.search() tunned_rankings=automl_tunned.rankings tunned_description=automl_tunned.describe_pipeline(automl_tunned.rankings.iloc[0]["id"],return_dict=True) tunned_pipeline= automl_tunned.best_pipeline tunned_pipeline.score(x_test, y_test, objectives=[objective]) tunned_pipeline.fit(x_train,y_train) pred=tunned_pipeline.predict(x_test).to_series() file=open("model_details.txt","w") str_dict=repr(tunned_description) file.write(str_dict) file.close() def get_binary_file_downloader_html(bin_file, file_label='File'): with open(bin_file, 'rb') as f: data = f.read() bin_str = base64.b64encode(data).decode() href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">Click Here To Download {file_label}</a>' return href col1,col2,col3=st.beta_columns([1,1,1]) if col2.button("Predict Results",key="output",help="shows results"): st.spinner() with st.spinner(text='In progress'): st.info("Wait while we are selecting a best algoritham for your problem..Hold your breath.") time.sleep(20) st.info("Done. Here you go.") st.write(pred) col11,col12=st.beta_columns([3,1]) with col11: with st.beta_expander("Compare Models"): st.write(tunned_rankings) with col12: with st.beta_expander("Best Pipeline"): st.success(tunned_pipeline) st.markdown(get_binary_file_downloader_html('model_details.txt', 'Pipeline Details'), unsafe_allow_html=True)