def balance_chart(input_ticker, balance_df): #부채비율, 유동비율, 당좌비율 st.subheader('Asset, Liabilities, ShareholderEquity') x_data = balance_df.index title = '(' + input_ticker + ') <b>Asset & Liabilities</b>' titles = dict(text=title, x=0.5, y=0.85) fig = make_subplots(specs=[[{'secondary_y': True}]]) #y_data_bar3 = ['totalAssets', 'totalLiabilities', 'totalShareholderEquity'] y_data_bar3 = ['totalLiabilities', 'totalShareholderEquity'] y_data_line3 = ['Debt/Equity', 'QuickRatio', '유동부채/자기자본'] for y_data, color in zip(y_data_bar3, marker_colors): fig.add_trace(go.Bar(name=y_data, x=x_data, y=balance_df[y_data], text=balance_df[y_data], textposition='outside', marker_color=color), secondary_y=False) for y_data, color in zip(y_data_line3, marker_colors): fig.add_trace(go.Scatter(mode='lines+markers+text', name=y_data, x=x_data, y=balance_df.loc[:, y_data], text=balance_df[y_data], textposition='top center', marker_color=color), secondary_y=True) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(range=[0, max(balance_df.loc[:, y_data_bar3[0]]) * 2], secondary_y=False) fig.update_yaxes(range=[ -max(balance_df.loc[:, y_data_line3[0]]), max(balance_df.loc[:, y_data_line3[0]]) * 1.2 ], secondary_y=True) fig.update_yaxes(title_text="Liabilities Rate", showticklabels=True, showgrid=True, zeroline=True, zerolinecolor='LightPink', ticksuffix="%", secondary_y=True) fig.update_yaxes(title_text="Asset", showticklabels=True, showgrid=False, zeroline=True, tickprefix="$", secondary_y=False) fig.update_layout(title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template) fig.update_layout(barmode='stack') fig.update_layout(template="myID") st.plotly_chart(fig) #무형자산총자금비율, 현금자산비율 x_data = balance_df.index title = '(' + input_ticker + ') <b>IntangibleAssets & Cash And ShortTermInvestments</b>' titles = dict(text=title, x=0.5, y=0.85) fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data_bar4 = ['무형자산비율', '현금성자산비율'] y_data_bar4_name = ['intangible/Assets', 'Cash/Assets'] fig.add_trace(go.Bar(name=y_data_bar4_name[1], x=x_data, y=balance_df[y_data_bar4[1]], text=balance_df[y_data_bar4[1]], textposition='outside', marker_color=marker_colors[0]), secondary_y=False) fig.add_trace(go.Scatter(mode='lines+markers+text', name=y_data_bar4_name[0], x=x_data, y=balance_df[y_data_bar4[0]], text=balance_df[y_data_bar4[0]], textposition='top center', marker_color=marker_colors[2]), secondary_y=True) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text="Cash/Assets", showticklabels=True, showgrid=True, zeroline=True, ticksuffix="%", secondary_y=False) fig.update_yaxes(title_text="intangible/Assets", showticklabels=True, showgrid=False, zeroline=True, ticksuffix="%", secondary_y=True) fig.update_layout(title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template) fig.update_layout(template="myID") st.plotly_chart(fig)
def app(): def upload_data(uploaded_file): df = pd.read_csv(uploaded_file, sep=';') numeric_cols = list(df.select_dtypes(['float64', 'int64']).columns) text_data = df.select_dtypes(['object']) text_cols = text_data.columns return df, numeric_cols, text_cols st.subheader('Visualization') st.info( 'Exploring the world of Machine Learning and Artificial Intelligence with the magic of data' ) with st.beta_expander("Upload"): col1, col2 = st.beta_columns(2) with col1: uploaded_file = st.file_uploader(label="Upload your csv file:", type=['csv', 'xlsx']) if uploaded_file is not None: try: df, numeric_cols, text_cols = upload_data(uploaded_file) except Exception as e: df = pd.read_excel(uploaded_file) numeric_cols = list( df.select_dtypes(['float', 'int']).columns) try: if uploaded_file is not None: if st.button('View Data'): latest_iteration = st.empty() for i in range(100): latest_iteration.info(f' {i + 1} %') time.sleep(0.05) time.sleep(0.2) latest_iteration.empty() st.info(uploaded_file.name) st.write(df) x_val = df.shape[0] y_val = df.shape[1] st.write("Data-shape :", x_val, "Features :", y_val) else: st.error("Please Upload a File") except Exception as e: print('') with st.beta_expander("Let's Visualise"): col3, col4 = st.beta_columns((1, 3)) if uploaded_file is not None: with col3: chart_select = st.selectbox(label="Select the chart-type", options=[ 'Scatter-plots', 'Histogram', 'Distplot', 'Box-plot', 'Violin-plot', 'Line-chart', 'Heat-map' ]) if chart_select == 'Scatter-plots': st.subheader("Scatter-plot Settings:") x_values = st.selectbox('X-axis', options=numeric_cols) y_values = st.selectbox('Y-axis', options=numeric_cols) with col4: plot = px.scatter(data_frame=df, x=x_values, y=y_values) st.plotly_chart(plot) if chart_select == 'Histogram': st.subheader("Histogram Settings:") x_values = st.selectbox('value', options=numeric_cols) x_val = np.array(df[x_values]) fig, ax = plt.subplots(figsize=(15, 9)) sns.set_style("dark") sns.set_style("darkgrid") sns.histplot(data=x_val, kde=True) with col4: st.pyplot(fig) if chart_select == 'Distplot': st.subheader("Distplot Settings:") x_values = st.selectbox('value', options=numeric_cols) x_val = np.array(df[x_values]) fig, ax = plt.subplots(figsize=(15, 9)) sns.set_style("dark") sns.set_style("darkgrid") sns.distplot(x_val) with col4: st.pyplot(fig) if chart_select == 'Box-plot': st.subheader("Box-plot Settings:") x_values = st.selectbox('X-axis', options=numeric_cols) y_values = st.selectbox('Y-axis', options=numeric_cols) with col4: plot = px.box(data_frame=df, x=x_values, y=y_values) st.plotly_chart(plot) if chart_select == 'Violin-plot': st.subheader("Violin-plot Settings:") x_values = st.selectbox('X-axis', options=numeric_cols) y_values = st.selectbox('Y-axis', options=numeric_cols) with col4: plot = px.violin(data_frame=df, x=x_values, y=y_values, points='all', box=True) st.plotly_chart(plot) if chart_select == 'Heat-map': st.subheader('Heat-map') @st.cache def create_data(): data_val = pd.DataFrame(df) return data_val data_val = create_data() fig, ax = plt.subplots(figsize=(15, 9)) sns.set_style("darkgrid") sns.set_style("dark") sns.set_theme(style='darkgrid', palette='deep') sns.heatmap(data_val.corr(), ax=ax, annot=True, fmt='.3f', linewidths=.9, cbar_kws={"orientation": "horizontal"}, cmap='BuPu') with col4: st.pyplot(fig) if chart_select == 'Line-chart': print(uploaded_file.name) st.subheader("Line-3d-chart Settings:") option1 = False if uploaded_file.name == 'student-por.csv' or uploaded_file.name == 'student-mat.csv': error_entry = st.success("Grade-column created!!") time.sleep(0.1) error_entry.empty() grade = [] dgp = df for i in dgp['G3'].values: if i in range(0, 10): grade.append('F') elif i in range(10, 12): grade.append('D') elif i in range(12, 14): grade.append('C') elif i in range(14, 16): grade.append('B') else: grade.append('A') se = pd.Series(grade) dgp['Grade'] = se.values option1 = True if uploaded_file.name == 'student-por.csv' or uploaded_file.name == 'student-mat.csv' and option1 == True: ncols = list( dgp.select_dtypes(['float64', 'int64']).columns) feature_selection = st.multiselect( label="Features to plot", options=ncols, default=ncols[0]) feature_ticker = st.selectbox( 'Feature ticker', options=list(["A", "B", "C", "D", "E"])) print(feature_selection) if feature_selection: df1 = dgp df2 = df1[df1['Grade'] == feature_ticker] df_features = df2[feature_selection] with col4: plot = px.line(data_frame=df_features, x=df_features.index, y=feature_selection) st.plotly_chart(plot) elif feature_selection == []: st.error("Please select one Feature-selection") else: st.error("Please upload file in 'Upload' section") st.subheader("Pre-processing, Spliting, Training") col6, col7, col8 = st.beta_columns((1, 1, 1)) col9, col10 = st.beta_columns((6, 1)) if uploaded_file is not None: with col6: pg = st.beta_expander("Preprocessing") with pg: ppd = st.checkbox(label="Preprocess-data") if ppd: dataset = df sc = { 'GP': 1, 'MS': 2, } parent = { 'mother': 1, 'father': 2, 'other': 3, } reas = { 'home': 1, 'reputation': 2, 'course': 3, 'other': 4, } mjob = { 'teacher': 1, 'health': 2, 'services': 3, 'at_home': 4, 'other': 5, } fjob = { 'teacher': 1, 'health': 2, 'services': 3, 'at_home': 4, 'other': 5, } change = { 'yes': 1, 'no': 0, } dataset['address'].replace(to_replace="U", value=1, inplace=True) dataset['address'].replace(to_replace="R", value=2, inplace=True) dataset['famsize'].replace(to_replace="LE3", value=1, inplace=True) dataset['famsize'].replace(to_replace="GT3", value=2, inplace=True) dataset['Pstatus'].replace(to_replace="T", value=1, inplace=True) dataset['Pstatus'].replace(to_replace="A", value=2, inplace=True) dataset['romantic'] = dataset['romantic'].map(change) dataset['internet'] = dataset['internet'].map(change) dataset['famsup'] = dataset['famsup'].map(change) dataset['schoolsup'] = dataset['schoolsup'].map(change) dataset['sex'].replace(to_replace="M", value=1, inplace=True) dataset['sex'].replace(to_replace="F", value=2, inplace=True) dataset['Mjob'] = dataset['Mjob'].map(mjob) dataset['Fjob'] = dataset['Fjob'].map(fjob) dataset['activities'] = dataset['activities'].map(change) dataset['paid'] = dataset['paid'].map(change) dataset['nursery'] = dataset['nursery'].map(change) dataset['higher'] = dataset['higher'].map(change) dataset['reason'] = dataset['reason'].map(reas) dataset['guardian'] = dataset['guardian'].map(parent) dataset['school'] = dataset['school'].map(sc) grade = [] for i in dataset['G3'].values: if i in range(0, 10): grade.append(4) elif i in range(10, 12): grade.append(3) elif i in range(12, 14): grade.append(2) elif i in range(14, 16): grade.append(1) else: grade.append(0) Data1 = dataset se = pd.Series(grade) Data1['Grade'] = se.values dataset.drop(dataset[dataset.G1 == 0].index, inplace=True) dataset.drop(dataset[dataset.G3 == 0].index, inplace=True) d1 = dataset d1['All_Sup'] = d1['famsup'] & d1['schoolsup'] def max_parenteducation(d1): return (max(d1['Medu'], d1['Fedu'])) d1['maxparent_edu'] = d1.apply( lambda row: max_parenteducation(row), axis=1) # d1['PairEdu'] = d1[['Fedu', 'Medu']].mean(axis=1) d1['more_high'] = d1['higher'] & (d1['schoolsup'] | d1['paid']) d1['All_alc'] = d1['Walc'] + d1['Dalc'] d1['Dalc_per_week'] = d1['Dalc'] / d1['All_alc'] d1.drop(['Dalc'], axis=1, inplace=True) d1.drop(['Walc'], axis=1, inplace=True) d1['studytime_ratio'] = d1['studytime'] / (d1[[ 'studytime', 'traveltime', 'freetime' ]].sum(axis=1)) d1.drop(['studytime'], axis=1, inplace=True) d1.drop(['Fedu'], axis=1, inplace=True) d1.drop(['Medu'], axis=1, inplace=True) X = d1.iloc[:, [ 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34 ]] Y = d1.iloc[:, [28]] time.sleep(0.01) dp = st.success("Data-Preprocessed") time.sleep(1) dp.empty() with col7: sg = st.beta_expander("Splitting") with sg: sd = st.checkbox(label="Splitting Training Data") if sd: test_size = st.number_input('Test-size', value=0.3) random_state = st.number_input('Random-state', value=42) xTrain, xTest, yTrain, yTest = train_test_split( X, Y, test_size=test_size, random_state=random_state) with col8: tdd = st.beta_expander("Train") with tdd: classifier_name = st.selectbox("Select Classifier :", ("LVQ", "PNN")) if classifier_name == "LVQ": check_box5 = st.checkbox(label="LVQ Classifier-Settings") if check_box5: feat_range = d1.shape[1] n_inp1 = st.selectbox('Features-inputs', range(feat_range)) n_cla1 = st.number_input('Classes', 0) step1 = st.number_input('Step', 0.01) with col9: t = st.button("Train") if t: Lvq_net = algorithms.LVQ21(n_inputs=n_inp1, n_classes=n_cla1, verbose=False, step=step1, shuffle_data=False) Lvq_net.train(xTrain, yTrain, epochs=100) y_training = Lvq_net.predict(xTrain) y_prediction = Lvq_net.predict(xTest) time.sleep(0.1) zz = st.balloons() st.markdown( 'Prediction accuracy of LVQ Train data : ', unsafe_allow_html=True) st.write('{:.2%}\n'.format( metrics.accuracy_score(yTrain, y_training))) st.markdown( 'Prediction accuracy of LVQ Test data : ', unsafe_allow_html=True) st.write('{:.2%}\n'.format( metrics.accuracy_score(yTest, y_prediction))) cohen_score = cohen_kappa_score( yTest, y_prediction) st.markdown('LVQ Cohen-Kappa Score :', unsafe_allow_html=True) st.write(cohen_score) time.sleep(1) zz.empty() if classifier_name == "PNN": check_box5 = st.checkbox(label="PNN Classifier-Settings") if check_box5: std_dev = st.number_input("Standard-deviation", 5) with col9: p = st.button("Train") if p: pnn = algorithms.PNN(std=std_dev, verbose=False) pnn.train(xTrain, yTrain) y_training1 = pnn.predict(xTrain) y_prediction1 = pnn.predict(xTest) time.sleep(0.1) xy = st.balloons() st.markdown( 'Prediction accuracy of PNN Train data : ', unsafe_allow_html=True) st.write('{:.2%}\n'.format( metrics.accuracy_score(yTrain, y_training1))) st.markdown( 'Prediction accuracy of PNN Test data : ', unsafe_allow_html=True) st.write('{:.2%}\n'.format( metrics.accuracy_score(yTest, y_prediction1))) cohen_score = cohen_kappa_score( yTest, y_prediction1) st.markdown('PNN Cohen-Kappa Score :', unsafe_allow_html=True) st.write(cohen_score) time.sleep(1) xy.empty() else: st.error("Please upload a file in 'Upload' section.")
y='DECODER.acc', facet_col=sort, hover_data=[ 'TAPE.stability.S_Corr', 'TAPE.fluorescence.S_Corr', 'DMS.protein_g.P_corr', 'DMS.1D5R.P_corr', 'DMS.2H11.P_corr' ], color='MODEL', labels={ 'DECODER.acc': 'Accuracy of decoder', 'MODEL_PARAMS.channels': 'chnls:' }, barmode="group") fig.update_layout(title='Decoder Accuracy', width=900, height=700) st.plotly_chart(fig) if st.checkbox("Get explanation of model names?"): st.markdown(expl_model_names()) #------------------------------------------------------------- # DOWNSTREAM MODEL COMPARISONS #------------------------------------------------------------- if display_mdl_compare: st.header("Downstream Model Performances ") # plotting options st.markdown('Options:') # add option to select models to plot options_models = ['all'] + [i for i in list(df['models'])]
# ay=-30, bordercolor="#c7c7c7", borderwidth=2, borderpad=4, bgcolor="#ff7f0e", opacity=0.7) fig.update_shapes(dict(xref='x', yref='y')) config = dict({ "modeBarButtonsToRemove": ['autoScale2d', 'toggleSpikelines'], }) fig.update_layout(autosize=False, width=900, height=550, margin=dict(l=20, r=40, b=40, t=70)) st.plotly_chart(fig, config=config) stem_df = update_stem( session_state) #, taps_per_phase, gen_2X, max_fft, dsp_type) try: # ipdb.set_trace() num_rows = len(stem_df) if num_rows > 1024: div = num_rows // 1024 stem_df = stem_df.iloc[::div, :] fig = plotly_time_helper(stem_df, opacity=[.8] * 2, index_str='sig_idx', y_name='Taps', stem_plot=False,
#New York Presbyterian hospital_nyp = df_hospital_2[df_hospital_2['hospital_name'] == 'NEW YORK-PRESBYTERIAN HOSPITAL'] inpatient_nyp = df_inpatient_2[df_inpatient_2['provider_name'] == 'NEW YORK-PRESBYTERIAN HOSPITAL'] outpatient_nyp = df_outpatient_2[df_outpatient_2['provider_name'] == 'New York-Presbyterian Hospital'] #Bar Chart st.subheader('**Hospital Types Within NY**') bar1 = hospitals_ny['hospital_type'].value_counts().reset_index() st.dataframe(bar1) st.markdown('This chart shows the number of hospitals within NY that identify as acute care, psychiatric, critical access, acute care - department of defense, and childrens. The majority of hospitals identify as acute care with 144 hospitals, followed by psychiatric with 27 hospitals, critical access with 18 hospitals, acute care - department of defense with 1 hospital, and childrens with 1 hospital.') #Pie Chart st.subheader('**Types of Hospitals Within NY**') fig = px.pie(bar1, values='hospital_type', names='index') st.plotly_chart(fig) st.markdown('This pie chart visually displays the same data presented in the previous bar chart. The largest percentage of NY hospitals are classified as acute care with 75.4%.') #Map st.subheader('**NY Hospital Locations**') hospitals_ny_gps = hospitals_ny['location'].str.strip('()').str.split(' ', expand=True).rename(columns={0: 'Point', 1:'lon', 2:'lat'}) hospitals_ny_gps['lon'] = hospitals_ny_gps['lon'].str.strip('(') hospitals_ny_gps = hospitals_ny_gps.dropna() hospitals_ny_gps['lon'] = pd.to_numeric(hospitals_ny_gps['lon']) hospitals_ny_gps['lat'] = pd.to_numeric(hospitals_ny_gps['lat']) st.map(hospitals_ny_gps) st.markdown('This is an interactive map that displays the locations for the NY hospital sites found within this dataset.')
def show_graph2(df, n,cu): past_graph = px.line(df[-n:], x='date',y=cu) past_graph.update_traces(mode="markers+lines") st.plotly_chart(past_graph)
def main(): # Render the readme as markdown using st.markdown. readme_text = st.markdown(get_file_content_as_string("intro.md")) # get the 1000 genomes samples dfsamples = get_1kg_samples_app() # Once we have the dependencies, add a selector for the app mode on the sidebar. st.sidebar.title("Visualization Settings") # select which set of SNPs to explore aisnp_set = st.sidebar.radio( "Set of ancestry-informative SNPs:", ("kidd et al. 55 aisnps", "seldin et al. 128 aisnps"), ) if aisnp_set == "kidd et al. 55 aisnps": aisnps_1kg = vcf2df_app("data/aisnps/kidd.aisnp.1kg.vcf", dfsamples) n_aisnps = 55 elif aisnp_set == "seldin et al. 128 aisnps": aisnps_1kg = vcf2df_app("data/aisnps/seldin.aisnp.1kg.vcf", dfsamples) n_aisnps = 128 # Encode 1kg data X_encoded, encoder = encode_genotypes_app(aisnps_1kg) # Dimensionality reduction dimensionality_reduction_method = st.sidebar.radio( "Dimensionality reduction technique:", ("pca", "umap", "t-SNE")) # perform dimensionality reduction on the 1kg set X_reduced, reducer = dimensionality_reduction_app( X_encoded, algorithm=dimensionality_reduction_method) # Which population to plot population_level = st.sidebar.radio("Population Resolution:", ("super population", "population")) # predicted population knn = KNeighborsClassifier(n_neighbors=9, weights="distance", n_jobs=2) # upload the user genotypes file user_file = st.sidebar.file_uploader("Upload your genotypes:") # Collapsable user aisnps DataFrame if user_file is not None: try: with st.spinner("Uploading your genotypes..."): userdf = SNPs(user_file.getvalue()).snps except Exception as e: st.error( f"Sorry, there was a problem processing your genotypes file.\n {e}" ) user_file = None # filter and encode the user record user_record, aisnps_1kg = filter_user_genotypes_app(userdf, aisnps_1kg) user_n_missing = (user_record.drop( columns=["super population", "population", "gender"]).isnull().sum( axis=1)["your_sample"]) user_encoded = encoder.transform(user_record) X_encoded = np.concatenate((X_encoded, user_encoded)) del userdf # impute the user record and reduce the dimensions user_imputed = impute_missing(X_encoded) user_reduced = reducer.transform([user_imputed]) # fit the knn before adding the user sample knn.fit(X_reduced, dfsamples[population_level]) # concat the 1kg and user reduced arrays X_reduced = np.concatenate((X_reduced, user_reduced)) dfsamples.loc["me"] = ["me"] * 3 # plot plotly_3d = plot_3d(X_reduced, dfsamples, population_level) st.plotly_chart(plotly_3d, user_container_width=True) # missingness st.subheader("Missing AIsnps") st.text( f"Your file upload was missing {user_n_missing} ({round((user_n_missing / n_aisnps) * 100, 1)}%) of the {n_aisnps} total AIsnps.\nThese locations were imputed during prediction." ) # predict the population for the user sample user_pop = knn.predict(user_reduced)[0] st.subheader(f"Your predicted {population_level}") st.text( f"Your predicted population using knn classifier is {user_pop}") # show the predicted probabilities for each population st.subheader(f"Your predicted {population_level} probabilities") user_pop_probs = knn.predict_proba(user_reduced) user_probs_df = pd.DataFrame([user_pop_probs[0]], columns=knn.classes_, index=["me"]) st.dataframe(user_probs_df) show_user_gts = st.sidebar.checkbox("Show Your Genotypes") if show_user_gts: user_table_title = "Genotypes of Ancestry-Informative SNPs in Your Sample" st.subheader(user_table_title) st.dataframe(user_record) else: # plot plotly_3d = plot_3d(X_reduced, dfsamples, population_level) st.plotly_chart(plotly_3d, user_container_width=True) # Collapsable 1000 Genomes sample table show_1kg = st.sidebar.checkbox("Show 1k Genomes Genotypes") if show_1kg is True: table_title = ( "Genotypes of Ancestry-Informative SNPs in 1000 Genomes Project Samples" ) with st.spinner("Loading 1k Genomes DataFrame"): st.subheader(table_title) st.dataframe(aisnps_1kg) # Render the readme as markdown using st.markdown. readme_text = st.markdown(get_file_content_as_string("details.md"))
def main(): activities = [ 'EDA', 'Visualization', 'Regression', 'Classification', 'Documentation', 'About Us' ] #st.sidebar.success('Updates Coming Soon! 🌟🎉') option = st.sidebar.selectbox('Choose a section', activities) st.sidebar.markdown( '''Use this section for finding useful insights about your data,and feel free to use them in your notebooks 🎯 Version : 1.0.2 ''') if option == 'EDA': st.subheader("Explanatory Data Analysis") data = st.file_uploader("Please upload a CSV dataset ", type=['csv']) st.warning('Your dataset goes here...') if data is not None: df = pd.read_csv(data) st.dataframe(df) st.info('Some useful data insights about your data') if st.checkbox("Display shape"): r, c = df.shape st.write('Rows = ', r, 'Columns = ', c) if st.checkbox('Display columns'): st.write(df.columns) if st.checkbox('Select multiple columns'): selected_col = st.multiselect('Select preferred columns', df.columns) df1 = df[selected_col] st.dataframe(df1) if st.checkbox("Head"): st.write(df.head()) if st.checkbox('Tail'): st.write(df.tail()) if st.checkbox('Null values'): st.write(df.isnull().sum()) if st.checkbox('Data types'): st.write(df.dtypes) if st.checkbox('Random sample'): st.write(df.sample(20)) if st.checkbox('Display correlations'): st.write(df.corr()) if st.checkbox('Summary'): st.write(df.describe(include='all').T) elif option == 'Visualization': st.subheader("Data Visualization and Graphing") st.sidebar.subheader("File Upload") # Setup file upload uploaded_file = st.sidebar.file_uploader( label="Upload your CSV file. (200MB max)", type=['csv']) if uploaded_file is not None: st.success('Your data goes here') try: df = pd.read_csv(uploaded_file) except Exception as e: st.warning('Data not found') global numeric_columns global non_numeric_columns try: st.write(df) numeric_columns = list(df.select_dtypes(['float', 'int']).columns) non_numeric_columns = list(df.select_dtypes(['object']).columns) non_numeric_columns.append(None) print(non_numeric_columns) except Exception as e: print(e) chart_select = st.sidebar.selectbox(label="Select the chart type", options=[ 'Scatterplots', 'Lineplots', 'Histogram', 'Boxplot', 'Violinplot', 'Piechart' ]) st.info('The Graphs generated will be displayed here') if chart_select == 'Scatterplots': st.sidebar.subheader("Scatterplot Settings") try: x_values = st.sidebar.selectbox('X axis', options=numeric_columns) y_values = st.sidebar.selectbox('Y axis', options=numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.scatter(data_frame=df, x=x_values, y=y_values, color=color_value) # display the chart st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Lineplots': st.sidebar.subheader("Line Plot Settings") try: x_values = st.sidebar.selectbox('X axis', options=numeric_columns) y_values = st.sidebar.selectbox('Y axis', options=numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.line(data_frame=df, x=x_values, y=y_values, color=color_value) st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Histogram': st.sidebar.subheader("Histogram Settings") try: x = st.sidebar.selectbox('Feature', options=numeric_columns) bin_size = st.sidebar.slider("Number of Bins", min_value=10, max_value=100, value=40) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.histogram(x=x, data_frame=df, color=color_value) st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Boxplot': st.sidebar.subheader("Boxplot Settings") try: y = st.sidebar.selectbox("Y axis", options=numeric_columns) x = st.sidebar.selectbox("X axis", options=non_numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.box(data_frame=df, y=y, x=x, color=color_value) st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Piechart': st.sidebar.subheader("Piechart Settings") try: x_values = st.sidebar.selectbox('X axis', options=numeric_columns) y_values = st.sidebar.selectbox('Y axis', options=non_numeric_columns) plot = px.pie(data_frame=df, values=x_values, names=y_values) st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Violinplot': st.sidebar.subheader("Violin Plot Settings") try: x_values = st.sidebar.selectbox('X axis', options=numeric_columns) y_values = st.sidebar.selectbox('Y axis', options=numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.violin(data_frame=df, x=x_values, y=y_values, color=color_value) st.plotly_chart(plot) except Exception as e: print(e) elif option == 'Regression': st.subheader("Regression ML Model Builder") # Model building def build_model(df): l = len(df) #df = df.iloc[:100] X = df.iloc[:, : -1] # Using all column except for the last column as X Y = df.iloc[:, -1] # Selecting the last column as Y st.markdown('**1.2. Dataset dimension**') st.write('X (Independent Axis)') st.info(X.shape) st.write('Y (Dependent Axis)') st.info(Y.shape) st.markdown('**1.3. Variable details**:') st.write('X variable (first few are shown)') st.info(list(X.columns[:int(l / 5)])) st.write('Y variable') st.info(Y.name) # Build lazy model X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=split_size, random_state=seed_number) reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None) models_train, predictions_train = reg.fit(X_train, X_train, Y_train, Y_train) models_test, predictions_test = reg.fit(X_train, X_test, Y_train, Y_test) st.subheader('2.Model Performance Plot (Training Set)') st.write('Training set') st.write(predictions_train) st.markdown(filedownload(predictions_train, 'training.csv'), unsafe_allow_html=True) st.write('Test set') st.write(predictions_test) st.markdown(filedownload(predictions_test, 'test.csv'), unsafe_allow_html=True) st.subheader('3.Model Performance Plot(Test set)') with st.markdown('**R-squared**'): # Tall predictions_test["R-Squared"] = [ 0 if i < 0 else i for i in predictions_test["R-Squared"] ] plt.figure(figsize=(3, 9)) sns.set_theme(style="darkgrid") ax1 = sns.barplot(y=predictions_test.index, x="R-Squared", data=predictions_test) ax1.set(xlim=(0, 1)) st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'), unsafe_allow_html=True) # Wide plt.figure(figsize=(12, 3)) sns.set_theme(style="darkgrid") ax1 = sns.barplot(x=predictions_test.index, y="R-Squared", data=predictions_test) ax1.set(ylim=(0, 1)) plt.xticks(rotation=90) st.pyplot(plt) st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'), unsafe_allow_html=True) with st.markdown('**RMSE (capped at l/2)**'): # Tall predictions_test["RMSE"] = [(l / 2) if i > (l / 2) else i for i in predictions_test["RMSE"]] plt.figure(figsize=(3, 9)) sns.set_theme(style="darkgrid") ax2 = sns.barplot(y=predictions_test.index, x="RMSE", data=predictions_test) st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'), unsafe_allow_html=True) # Wide plt.figure(figsize=(12, 3)) sns.set_theme(style="darkgrid") ax2 = sns.barplot(x=predictions_test.index, y="RMSE", data=predictions_test) plt.xticks(rotation=90) st.pyplot(plt) st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'), unsafe_allow_html=True) with st.markdown('**Calculation time**'): # Tall predictions_test["Time Taken"] = [ 0 if i < 0 else i for i in predictions_test["Time Taken"] ] plt.figure(figsize=(3, 9)) sns.set_theme(style="darkgrid") ax3 = sns.barplot(y=predictions_test.index, x="Time Taken", data=predictions_test) st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'), unsafe_allow_html=True) # Wide plt.figure(figsize=(9, 3)) sns.set_theme(style="darkgrid") ax3 = sns.barplot(x=predictions_test.index, y="Time Taken", data=predictions_test) plt.xticks(rotation=90) st.pyplot(plt) st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'), unsafe_allow_html=True) def filedownload(df, filename): csv = df.to_csv(index=False) b64 = base64.b64encode( csv.encode()).decode() # strings <-> bytes conversions href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>' return href def imagedownload(plt, filename): s = io.BytesIO() plt.savefig(s, format='pdf', bbox_inches='tight') plt.close() b64 = base64.b64encode( s.getvalue()).decode() # strings <-> bytes conversions href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} File</a>' return href with st.sidebar.header('File Uploader Section'): uploaded_file = st.sidebar.file_uploader( "Upload an input as CSV file", type=["csv"]) with st.sidebar.header( 'Set the optimization parameters\n (Grab the slider and set to any suitable point)' ): split_size = st.sidebar.slider('Data split ratio (in fraction):', 0.0, 1.0, 0.7, 0.01) seed_number = st.sidebar.slider('Set the random-seed-value :', 0, 1, 100, 5) with st.sidebar.header('Project made by:'): st.write("Made by: MAINAK CHAUDHURI") #---------------------------------# st.subheader('Dataset display') if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.markdown('**Snap of the dataset**') st.write(df) build_model(df) else: st.info('Upload a file') st.info('OR') if st.button('Use preloaded data instead'): st.info("Dataset used : Pima diabetes") diabetes = load_diabetes() X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names).loc[:100] Y = pd.Series(diabetes.target, name='response').loc[:100] df = pd.concat([X, Y], axis=1) st.markdown( 'Displaying results form a sample preloaded data :') st.write(df.head(5)) build_model(df) elif option == 'Classification': st.subheader("Classifier ML Model Builder") def build_model(df): l = len(df) #df = df.iloc[:100] X = df.iloc[:, : -1] # Using all column except for the last column as X Y = df.iloc[:, -1] # Selecting the last column as Y st.markdown('**1.2. Dataset dimension**') st.write('X (Independent Axis)') st.info(X.shape) st.write('Y (Dependent Axis)') st.info(Y.shape) st.markdown('**1.3. Variable details**:') st.write('X variable (first few are shown)') st.info(list(X.columns[:int(l / 5)])) st.write('Y variable') st.info(Y.name) # Build lazy model X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=split_size, random_state=seed_number) clf = LazyClassifier(verbose=0, ignore_warnings=False, custom_metric=None) models_train, predictions_train = clf.fit(X_train, X_train, Y_train, Y_train) models_test, predictions_test = clf.fit(X_train, X_test, Y_train, Y_test) st.subheader('2.Model Performance Plot (Training Set)') st.write('Training set') st.write(predictions_train) st.markdown(filedownload(predictions_train, 'training.csv'), unsafe_allow_html=True) st.write('Test set') st.write(predictions_test) st.markdown(filedownload(predictions_test, 'test.csv'), unsafe_allow_html=True) st.subheader('3.Model Performance Plot(Test set)') with st.markdown('**Accuracy**'): # Tall predictions_test["Accuracy"] = [ 0 if i < 0 else i for i in predictions_test["Accuracy"] ] plt.figure(figsize=(5, 12)) sns.set_theme(style="darkgrid") ax1 = sns.barplot(y=predictions_test.index, x="Accuracy", data=predictions_test) ax1.set(xlim=(0, 1)) st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'), unsafe_allow_html=True) # Wide plt.figure(figsize=(12, 5)) sns.set_theme(style="darkgrid") ax1 = sns.barplot(x=predictions_test.index, y="Accuracy", data=predictions_test) ax1.set(ylim=(0, 1)) plt.xticks(rotation=90) st.pyplot(plt) st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'), unsafe_allow_html=True) def filedownload(df, filename): csv = df.to_csv(index=False) b64 = base64.b64encode( csv.encode()).decode() # strings <-> bytes conversions href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>' return href def imagedownload(plt, filename): s = io.BytesIO() plt.savefig(s, format='pdf', bbox_inches='tight') plt.close() b64 = base64.b64encode( s.getvalue()).decode() # strings <-> bytes conversions href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} File</a>' return href with st.sidebar.header('File Uploader Section'): uploaded_file = st.sidebar.file_uploader( "Upload an input as CSV file", type=["csv"]) with st.sidebar.header( 'Set the optimization parameters\n (Grab the slider and set to any suitable point)' ): split_size = st.sidebar.slider('Data split ratio (in fraction):', 0.0, 1.0, 0.7, 0.01) seed_number = st.sidebar.slider('Set the random-seed-value :', 0, 1, 100, 5) with st.sidebar.header('Project made by:'): st.write("Made by: MAINAK CHAUDHURI") #---------------------------------# st.subheader('Dataset display') if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.markdown('**Snap of the dataset**') st.write(df) build_model(df) else: st.info('Upload a file') st.info('OR') if st.button('Use preloaded data instead'): st.info("Dataset used : Pima diabetes") diabetes = load_diabetes() X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names).loc[:100] Y = pd.Series(diabetes.target, name='response').loc[:100] df = pd.concat([X, Y], axis=1) st.markdown( 'Displaying results form a sample preloaded data :') st.write(df.head(5)) build_model(df) elif option == 'Documentation': st.subheader("How to use Notebooker Pro") st.markdown( '''The notebooker pro is a user-friendly software designed to help you make a good data science notebook in few steps. Well, notebooker pro will not be making a notebook for you, but will provide you with all the data insights that you will need to put in your kernel. The notebooker pro has been provided with 4 major sections: i. **EDA (Explanatory Data Analysis)** --> used to find important data and statistical insights from the uploaded files ii. **Visualization** --> Used to perform data visualization with 5 basic important types of graphs iii.**Regression** --> Loops through **30** different regression models and returns the complexity statistics of the result of regression modelling for your dataset for chosen seed values and size. The only thing to keep in mind while using this is that, the data must be fitting with a regression modelling. Datasets used for classification algorithm might generate vague results. So use a proper dataset. **[eg.: do not use iris,cancer,penguins etc. classifier dataset]** iv. **Classification** --> Loops through **30** different classification models and returns the complexity statistics of the result of classification modelling for your dataset for chosen seed values and size. The only thing to keep in mind while using this is that, the data must be fitting with a classification modelling. Datasets used for non-classification algorithm might generate vague results. So use a proper dataset. **Features:** **Upload file** => Upload only csv files. **Data split** => This is a linear slidebar, that will let you choose split ratio between 0 to 1 **Random seed** => Helps to randomize the data in training and testing data samples. You may change to get the best accuracy of for a particular model. ''') elif option == 'About Us': st.subheader("About Us 😊") st.markdown( '''This web application is made by Mainak Chaudhuri. He is a Computer Science and Engineering student of the SRM University, studying in the second year of B.Tech. The main idea of this application is to help beginners and data science enthusiasts chalk out a plan for preparing a good data science notebook, for college projects, online courses or to add in their portfolio. This application accepts a dataset from the user and displays useful insights about the data. Additionally, it also helps the user visualize the data, choose the best supervised machine learning model (regression & classifaction handled separately) and decide the best suit depending on the dataset size,split and seed values which can be set by the user with the help of the side panel. This application claims to be the first of it's kind ever developed till date by a single developer and also has a serving history and positive reports from 180+ users. 👉 N.B. : This application is an intellectual property of Mainak Chaudhuri and hence holds a reserved copyright. Any form of illegal immitation of graphics, contents or documentation without prior permission of the owner if proved, can result in legal actions against the plagiarist.''' ) st.success('For more info, feel free to contact @ : ') url = 'https://www.linkedin.com/in/mainak-chaudhuri-127898176/' if st.button('Mainak Chaudhuri'): webbrowser.open_new_tab(url)
plotly_fig1 = px.bar(excluded_df, x=excluded_df['Department'], y=excluded_df['Average'], title='Average grades by department', labels=dict(x="Department", y="Average Grade")) plotly_fig1.update_traces(marker_color='rgb(221,55,55)') # Centre the plot title. plotly_fig1.update_layout( title={ 'text': 'Average grades by department', 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) st.plotly_chart(plotly_fig1) plotly_fig2 = px.bar(levels, x='Level', y='Average', hover_data=['Level', 'Average'], color='Module Count', labels={"Module Count": "Number of modules used"}, height=400, color_continuous_scale=[ "rgb(221,55,55)", "rgb(137,164,204)", "rgb(30,77,155)" ]) # Centre the plot title. plotly_fig2.update_layout(title={ 'text': 'Average grades by level',
args=[{ 'visible': [True, True] }, { 'title': 'Linear scale', 'yaxis': { 'type': 'linear', 'domain': [0.35, 1] } }]) ]), ) ]) layout = dict( updatemenus=updatemenus, title='Linear scale', width=900, height=700, autosize=False, yaxis1=dict(domain=[0.35, 1]), yaxis2=dict(domain=[0, .3]), ) fig.update_layout(layout) fig.add_trace(trace, row=1, col=1) fig.add_trace(trace2, row=2, col=1) st.plotly_chart(fig, use_container_width=False) # st.plotly_chart(go.Figure(go.Bar(x=table['date'], y=table['Δ'], name='diff. of COVID cases')))
def main(): menu = ['主页', '排名', '查询', '其他'] choice = st.sidebar.selectbox('工具箱', menu) @st.cache def load_data(): data = pd.read_csv('project_20_listings.csv') data.rename(columns={'snapshotted_at': 'date'}, inplace=True) data['date'] = pd.to_datetime(pd.to_datetime(data['date']).dt.date) return data data = load_data() if choice == '主页': st.title('傲基2.0品类管理') st.header('品类详情') st.markdown('针对每个asin,日排名规则为:') st.markdown(''' 每日排名取当日排名的最高值 ''') elif choice == '排名': c1, c2 = st.beta_columns(2) category = st.selectbox('CategoryID:', data['category_id'].unique()) table = pd.pivot_table(data[data['category_id'] == category], values=['ranking'], index=['asin'], columns=['date'], aggfunc={'ranking': max}) st.write(table['ranking']) with c1: chosed_asin = st.selectbox( 'Asin', data[data['category_id'] == category].asin.unique()) st.write(table['ranking'].loc[f'{chosed_asin}']) with c2: df = table['ranking'].loc[f'{chosed_asin}'] fig = px.line(df, x=df.index, y=df, title=f'{chosed_asin}排名变化') st.subheader('当月排名变化情况') st.plotly_chart(fig) start_time = st.slider('该品类下所有Asin的最高日排名', value=datetime(2021, 1, 28), format='MM/DD/YY') if start_time in table['ranking'].columns: st.write( table['ranking'][f'{start_time}'], 'and change in data compared to previous date is:', ) else: st.write('data is not yet available now') with st.beta_expander('原始数据详情', expanded=True): i = st.number_input('输入你想要看到的条数', min_value=1, value=50, step=50) detail = data.iloc[:i, :] st.write(detail) elif choice == '查询': ci = st.multiselect('品类ID', data['category_id'].unique()) newdate = st.multiselect( '日期', data[(data['category_id'].isin(ci))].date.unique()) asin = st.multiselect( 'ASIN', data[data['category_id'].isin(ci) & (data['date'].isin(newdate))].asin.unique()) newtable = data[(data['category_id'].isin(ci)) & (data['asin'].isin(asin)) & (data['date'].isin(newdate))] st.write(newtable)
def main(): st.title('APS Regressão Linear ') st.text( 'Gabriel Oliveira Ramos do Nascimento RA: 21022939 \nJackson do Nascimento Silva RA: 21022770 \nLaura Damaceno de Almeida RA: 20964736 \nVictor Hugo Kawabata Fuzaro RA: 20760102' ) st.image('image.png', width=900) file = st.file_uploader('Escolha seu arquivo', type='csv') if file is not None: slider = st.slider('Quantidade de linhas', 0, 100) df = pd.read_csv(file) st.dataframe(df.head(slider)) st.markdown('**Nome das colunas**') st.write(df.columns) st.markdown('**Número de linhas**') st.write(df.shape[0]) st.markdown('**Número de colunas**') st.write(df.shape[1]) exploracao = pd.DataFrame({ 'nomes': df.columns, 'tipos': df.dtypes, 'NA #': df.isna().sum(), 'NA %': df.isna().sum() / df.shape[0] * 100 }) st.markdown('**Contagem dos tipos de dados**') st.write(exploracao.tipos.value_counts()) st.markdown('**Nome das colunas do tipo int64**') st.markdown(list(exploracao[exploracao['tipos'] == 'int64']['nomes'])) st.markdown('**Nomes das colunas do tipo float64:**') st.markdown(list( exploracao[exploracao['tipos'] == 'float64']['nomes'])) st.markdown('**Nomes das colunas do tipo object:**') st.markdown(list(exploracao[exploracao['tipos'] == 'object']['nomes'])) st.markdown('**Tabela com coluna e percentual de dados faltantes :**') st.table(exploracao[exploracao['NA #'] != 0][['tipos', 'NA %']]) st.markdown('**Descrição dos dados :**') st.table(df.describe()) opcoes = df.columns aux = pd.DataFrame({"coluna": df.columns, "tipos": df.dtypes}) colunas_numericas = list(aux[aux['tipos'] != 'object']['coluna']) st.subheader('Estatística descritiva') col = st.selectbox('Selecione a coluna', colunas_numericas) if (col is not None): st.markdown('Selecione o que deseja analisar') mean = st.checkbox('Média') if mean: st.markdown(df[col].mean()) mediana = st.checkbox('Mediana') if mediana: st.markdown(df[col].median()) desvio_padrao = st.checkbox('Desvio Padrão') if desvio_padrao: st.markdown(df[col].std()) kurtosis = st.checkbox('Kurtosis') if kurtosis: st.markdown(df[col].kurtosis()) skewness = st.checkbox('Skewness') if skewness: st.markdown(df[col].skew()) st.subheader('Visualização dos dados') selected_atributos = st.multiselect('Selecione os atributos', opcoes) type_visualize = st.selectbox('Selecione o tipo de visualização', [ 'selecione', 'boxplot', 'scatter plot', 'barchart', 'histograma', 'Matriz de correlação' ]) df.dropna(inplace=True) if (len(selected_atributos) > 2): st.markdown('Selecione no máximo 2 atributos') if (len(selected_atributos) <= 2): if (type_visualize == 'barchart'): plot_data = df[selected_atributos[0]] st.bar_chart(plot_data) if (type_visualize == 'boxplot'): if (len(selected_atributos) == 1): fig = px.box(df, y=selected_atributos[0], hover_data=['Country']) #df.boxplot([selected_atributos[0]]) else: fig = px.box(df, x=selected_atributos[0], y=selected_atributos[1], hover_data=['Country']) #df.boxplot([selected_atributos[0]], by=[selected_atributos[1]]) #st.pyplot() st.plotly_chart(fig) if (type_visualize == 'scatter plot'): if (len(selected_atributos) == 1): fig = px.scatter(df, x=selected_atributos[0], hover_data=['Country']) st.plotly_chart(fig, use_container_width=True) if (len(selected_atributos) == 2): fig = px.scatter(df, x=selected_atributos[0], y=selected_atributos[1], hover_data=['Country']) st.plotly_chart(fig, use_container_width=True) if (type_visualize == 'histograma'): sns.distplot(df[selected_atributos[0]]) st.pyplot() if (type_visualize == 'Matriz de correlação'): st.write(df.corr()) st.markdown('**Regressão Linear**') Y = st.selectbox('Selecione a variável Y', opcoes) x = st.multiselect('Selecione a variável X', opcoes) if ((Y != None) & (len(x) >= 1)): modelo = LinearRegression() X_train, X_test, y_train, y_test = train_test_split( df[x], df[Y], test_size=0.3, random_state=2811) modelo.fit(X_train, y_train) st.text("R quadrado = {}".format( modelo.score(X_train, y_train).round(2))) y_predict_train = modelo.predict(X_train) lm = modelo.predict(X_test) st.text("R quadrado de teste = {}".format( metrics.r2_score(y_test, lm).round(2))) sns.regplot(x=y_predict_train, y=y_train) st.pyplot() index = x index.append('Intercept') st.markdown('**Formula da Regressão Linear**') st.image('formula.png', width=500) st.table( pd.DataFrame(data=np.append(modelo.intercept_, modelo.coef_), index=index, columns=['Parametros']))
def earning_chart(input_ticker, earning_df, ea_df, price_df): #주가와 EPS title = '(' + input_ticker + ') EPS & Price' titles = dict(text=title, x=0.5, y=0.9) x_data = earning_df['reportedDate'] # EPS발표 날짜로 fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data_bar = ['reportedEPS', 'estimatedEPS', 'surprise', 'ttmEPS'] for y_data, color in zip(y_data_bar, marker_colors): fig.add_trace(go.Bar(name=y_data, x=x_data, y=earning_df[y_data], marker_color=color), secondary_y=False) fig.add_trace( go.Scatter(mode='lines', name='Close', x=price_df.index, y=price_df['Close'], text=price_df['Close'], textposition='top center', marker_color='rgb(0,0,0)'), # marker_colorscale='RdBu'), secondary_y=True) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text='Close', showticklabels=True, showgrid=False, zeroline=True, tickprefix="$") fig.update_yaxes(title_text='EPS', showticklabels=True, showgrid=True, zeroline=True, tickprefix="$", secondary_y=False) fig.update_layout( title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template ) #, xaxis_tickformat = 'd')# legend_title_text='( 단위 : $)' fig.update_layout(template="myID") fig.update_layout( showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), xaxis=go.layout.XAxis(rangeselector=dict(buttons=list([ dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(count=5, label="5y", step="year", stepmode="backward"), dict(count=10, label="10y", step="year", stepmode="backward"), dict(step="all") ])), rangeslider=dict(visible=True), type="date")) st.plotly_chart(fig) fig2 = go.Figure() title = '(' + input_ticker + ') reportedEPS Statistics' titles = dict(text=title, x=0.5, y=0.9) fig2.add_trace( go.Box(x=earning_df.loc[:, 'reportedEPS'], name='reportedEPS', boxpoints='all', marker_color='indianred', boxmean='sd', jitter=0.3, pointpos=-1.8)) fig2.update_layout(title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template) # fig2.add_trace(go.Box(x=earning_df.loc[:,'EPS Change'], name='EPS Change')) st.plotly_chart(fig2)
def kor_earning_chart(input_ticker, com_name, ttm_df, annual_df): #주가와 ttm EPS title = '(' + com_name + ') TTM EPS & Price' titles = dict(text=title, x=0.5, y=0.9) x_data = ttm_df.index # EPS발표 날짜로 fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data = ['EPS', 'Price'] # for y_data, color in zip(y_data_bar, marker_colors) : # fig.add_trace(go.Bar(name = y_data, x = x_data, y = earning_df[y_data], marker_color= color), secondary_y = False) fig.add_trace(go.Bar(name=y_data[0], x=x_data, y=ttm_df[y_data[0]], marker_color=marker_colors[1]), secondary_y=False) fig.add_trace( go.Scatter(mode='lines', name='Close', x=ttm_df.index, y=ttm_df['Price'], text=ttm_df['Price'], textposition='top center', marker_color='rgb(0,0,0)'), # marker_colorscale='RdBu'), secondary_y=True) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text='Close', showticklabels=True, showgrid=False, zeroline=True) fig.update_yaxes(title_text='TTM EPS', showticklabels=True, showgrid=True, zeroline=True, secondary_y=False) fig.update_layout( title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template ) #, xaxis_tickformat = 'd')# legend_title_text='( 단위 : $)' fig.update_layout(template="myID") st.plotly_chart(fig) #주가와 annual EPS title = '(' + com_name + ') Annual EPS & Price' titles = dict(text=title, x=0.5, y=0.9) x_data = annual_df.index # EPS발표 날짜로 fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data = ['EPS', 'Price'] # for y_data, color in zip(y_data_bar, marker_colors) : # fig.add_trace(go.Bar(name = y_data, x = x_data, y = earning_df[y_data], marker_color= color), secondary_y = False) fig.add_trace(go.Bar(name=y_data[0], x=x_data, y=annual_df[y_data[0]], marker_color=marker_colors[1]), secondary_y=False) fig.add_trace( go.Scatter(mode='lines', name='Close', x=annual_df.index, y=annual_df['Price'], text=annual_df['Price'], textposition='top center', marker_color='rgb(0,0,0)'), # marker_colorscale='RdBu'), secondary_y=True) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text='Close', showticklabels=True, showgrid=False, zeroline=True) fig.update_yaxes(title_text='Annual EPS', showticklabels=True, showgrid=True, zeroline=True, secondary_y=False) fig.update_layout( title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template ) #, xaxis_tickformat = 'd')# legend_title_text='( 단위 : $)' fig.update_layout(template="myID") st.plotly_chart(fig) fig2 = go.Figure() title = '(' + com_name + ') EPS Statistics' titles = dict(text=title, x=0.5, y=0.9) fig2.add_trace( go.Box(x=ttm_df.loc[:, 'EPS'], name='EPS', boxpoints='all', marker_color='indianred', boxmean='sd', jitter=0.3, pointpos=-1.8)) fig2.update_layout(title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template) # fig2.add_trace(go.Box(x=earning_df.loc[:,'EPS Change'], name='EPS Change')) st.plotly_chart(fig2) #PER, PBR, ROE 추이 x_data = ttm_df.index title = com_name + '(' + input_ticker + ') TTM PER PBR & ROE' titles = dict(text=title, x=0.5, y=0.85) fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data_line2 = ['PER', 'PBR'] y_data_bar2 = ['ROE'] fig.add_trace(go.Scatter(mode='lines+markers+text', name=y_data_line2[0], x=x_data, y=ttm_df[y_data_line2[0]], text=ttm_df[y_data_line2[0]], textposition='top center', marker_color=marker_colors[0]), secondary_y=False) fig.add_trace(go.Scatter(mode='lines+markers+text', name=y_data_line2[1], x=x_data, y=ttm_df[y_data_line2[1]], text=ttm_df[y_data_line2[1]], textposition='top center', marker_color=marker_colors[1]), secondary_y=True) fig.add_trace(go.Bar(name=y_data_bar2[0], x=x_data, y=ttm_df[y_data_bar2[0]], text=ttm_df[y_data_bar2[0]], textposition='outside', marker_color=marker_colors[2]), secondary_y=False) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text='ROE', secondary_y=False) fig.update_yaxes(title_text='PER', secondary_y=False) fig.update_yaxes(title_text='PBR', secondary_y=True) fig.update_yaxes(showticklabels=True, showgrid=False, zeroline=True) #, ticksuffix="%")##cja fig.update_layout(title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template) fig.update_layout(template="myID") st.plotly_chart(fig) # ROE와 마진율 x_data = ttm_df.index title = com_name + '(' + input_ticker + ') Margin & ROE' titles = dict(text=title, x=0.5, y=0.85) fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data_line2 = ['OPM', 'NPM'] y_data_bar2 = ['ROE'] for y_data, color in zip(y_data_line2, marker_colors): fig.add_trace(go.Scatter(mode='lines+markers+text', name=y_data, x=x_data, y=ttm_df[y_data], text=ttm_df[y_data], textposition='top center', marker_color=color), secondary_y=True) for y_data, color in zip(y_data_bar2, marker_colors): fig.add_trace(go.Bar(name=y_data, x=x_data, y=ttm_df[y_data], text=ttm_df[y_data], textposition='outside', marker_color=color), secondary_y=False) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text='ROE', range=[0, max(ttm_df.loc[:, y_data_bar2[0]]) * 2], secondary_y=False) fig.update_yaxes(title_text='Margin Rate', range=[ -max(ttm_df.loc[:, y_data_line2[0]]), max(ttm_df.loc[:, y_data_line2[0]]) * 1.2 ], secondary_y=True) fig.update_yaxes(showticklabels=True, showgrid=False, zeroline=True, ticksuffix="%") fig.update_layout(title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template) fig.update_layout(template="myID") st.plotly_chart(fig) #배당 title = '(' + com_name + ') Annual DPS & DY' titles = dict(text=title, x=0.5, y=0.9) x_data = annual_df.index # EPS발표 날짜로 fig = make_subplots(specs=[[{'secondary_y': True}]]) y_data = ['DPS', 'DY'] # for y_data, color in zip(y_data_bar, marker_colors) : # fig.add_trace(go.Bar(name = y_data, x = x_data, y = earning_df[y_data], marker_color= color), secondary_y = False) fig.add_trace(go.Bar(name=y_data[0], x=x_data, y=annual_df[y_data[0]], marker_color=marker_colors[0]), secondary_y=False) fig.add_trace( go.Scatter( mode='lines', name='Dividend Yeild', x=annual_df.index, y=annual_df[y_data[1]], text=annual_df[y_data[1]], textposition='top center', marker_color=marker_colors[1]), # marker_colorscale='RdBu'), secondary_y=True) fig.update_traces(texttemplate='%{text:.3s}') fig.update_yaxes(title_text='Dividend Yeild', showticklabels=True, showgrid=False, zeroline=True) fig.update_yaxes(title_text='Annual DPS', showticklabels=True, showgrid=True, zeroline=True, secondary_y=False) fig.update_layout( title=titles, titlefont_size=15, legend=dict(orientation="h"), template=template ) #, xaxis_tickformat = 'd')# legend_title_text='( 단위 : $)' fig.update_layout(template="myID") st.plotly_chart(fig)
def plot_raw_data(): fig = go.Figure() fig.add_trace(go.Scatter(x=data['Date'], y=data['Open'], name="stock_open")) fig.add_trace(go.Scatter(x=data['Date'], y=data['Close'], name="stock_close")) fig.layout.update(title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True) st.plotly_chart(fig)
def run_the_analysis(): caching.clear_cache() def load_metadata(): return pd.read_csv(os.path.join(path, r'myrecord.csv')) stabledf = load_metadata() stabledf['Date'] = pd.to_datetime(stabledf['Date']) stabledf = stabledf.set_index('Date') stabledf = stabledf.sort_index() stabledf monthtoview = st.selectbox('which month to view?', stabledf.index.month.drop_duplicates()) st.subheader('What you have spent this month?') thismonthdf = stabledf.loc[stabledf.index.month == monthtoview].groupby( ['Category']).sum() thismonthdf st.write('In total is $', round(thismonthdf.sum()[0], 2)) # #------ # print(thismonthdf[thismonthdf.index=='Food'].values) # comparison_labels = ['Food','daily goods','Transportaion'] # comparison_values = [thismonthdf[thismonthdf.index==comparison_labels[0]].values,thismonthdf[thismonthdf.index==comparison_labels[1]].values,thismonthdf[thismonthdf.index==comparison_labels[2]].values] # comparison_values[0] # fig = go.Figure(data=[go.Pie(labels=['Food','daily goods','Transportaion'], values=[32,24,35])]) # st.plotly_chart(fig) # import plotly.express as px fig = px.pie(thismonthdf, values='Amount', names=thismonthdf.index) st.plotly_chart(fig) category_filter = st.selectbox('Which category to look deeper into?', stabledf['Category'].drop_duplicates()) filterdata = stabledf.loc[(stabledf.index.month == monthtoview) & (stabledf['Category'] == category_filter)] st.subheader('selected') st.bar_chart(filterdata['Amount']) st.subheader('Compare with budgeting analysis') def load_metabudget(): return pd.read_csv(os.path.join(path, r'mybudget.csv')) budgetdf = load_metabudget() analysisdf = thismonthdf.merge(budgetdf, left_index=True, right_on='Category', suffixes=('_spent', '_budgeted')) analysisdf = analysisdf.set_index('Category') analysisdf st.subheader("how you've spent this month") fig2 = { 'data': [ go.Bar(x=analysisdf.index, y=analysisdf["Amount_budgeted"], name="Amount_budgeted"), go.Bar(x=analysisdf.index, y=analysisdf["Amount_spent"], name="Amount_spent") ], 'layout': go.Layout(barmode='overlay') } st.plotly_chart(fig2) st.subheader('Warning Section') currentyear = filterdata.index.year[0] for index, row in analysisdf.iterrows(): last_date_of_month = datetime(currentyear, monthtoview, 1) + relativedelta(months=1, days=-1) delta = datetime.today() - datetime(currentyear, monthtoview, 1) daysthismonth = last_date_of_month - datetime(currentyear, monthtoview, 1) if delta / daysthismonth <= row['Amount_spent'] / row[ 'Amount_budgeted']: st.write('Category', index, 'is over the proportion limit!!!')
ratings_all = get_ratings_all() ratings_df = pd.DataFrame(ratings_all) ratings_df = ratings_df.drop('game', 1).assign(**ratings_df.game.apply(pd.Series)) ratings_df = ratings_df.drop('earnings', 1).assign(**ratings_df.earnings.apply(pd.Series)) fig_all = px.scatter(ratings_df, x='downloads', y='revenue', hover_name='name', hover_data=['platform', 'publisher'], color='genre', template='plotly_dark', title="All game revenue vs downloads with genre colormap") st.plotly_chart(fig_all) st.write( "From the combined chart above, you can interact with the following chart to focus on the top 10 ranking type and gaming platform:" ) left_column, right_column = st.beta_columns(2) with right_column: rank_type = st.sidebar.radio('Sorting rank type', ('top free', 'top paid', 'top grossing')) with left_column: os_type = st.sidebar.radio('Sorting platform', ('android', 'iOS')) ra_search = ratings_df.loc[(ratings_df['rank_type'] == rank_type) & (ratings_df['platform'] == os_type)] fig_r = px.scatter(
def main(): # Menu menu = ['EDA', 'Interactive Charts', 'Prediction', 'About'] choices = st.sidebar.selectbox('Select Menu', menu) # load dataframe data = load_data('data/data.csv') # we established from EDA that year post 2013 is more stable and more reflective of # current market trends data2013 = data.query('year_sold >=2013') # When 'EDA' is selected at the menu. if choices == 'EDA': st.title('EDA') st.header("Project Title : HDB Price Prediction ML App") st.subheader(" Problem Statement") st.markdown( 'How do flat buyers know if they have snatched a good deal?\ Alternatively, how do flat sellers benchmark their property reasonably?\ In order to help flat buyers and flat sellers make an informed decision, we decided to find out more about resale flat prices in Singapore.\ Ultimately, we want to predict the price of resale flats in Singapore.' ) st.markdown( 'Complete notebook can be found [here](https://nbviewer.jupyter.org/github/andrewng88/hdb/blob/master/2_Exploratory_Data_Analysis.ipynb)' ) st.subheader("The Data") st.markdown( 'Obtained from [Data.gov.sg](http://data.gov.sg/dataset/resale-flat-prices)\ the dataset is from **1990 to 2019**.') if st.checkbox("Show Summary of the Dataset"): st.write(data.describe()) # display overall hdb price trend chart table1 = data.groupby("year_sold")["resale_price"].agg( ["median"]).reset_index() table1.rename(columns={"median": "resale_price"}, inplace=True) resale_price = px.line(table1, x="year_sold", y="resale_price") resale_price.update_layout( title_text='HDB Resale Price trend (1990 - 2019)', template='ggplot2') st.plotly_chart(resale_price) # chart commentary st.markdown( 'The decline in resale price and sudden surge in the number of units sold following 1997 is due to the 1997\ [Asian financial crisis](https://www.todayonline.com/singapore/divergent-hdb-resale-private-home-price-trends-will-not-last).\ With regards to the sharp spike in 2007 is because HDB has stopped Walk-In-Selection and replace it with Sale of Balance Flats\ which is only twice per year and hence everyone went with the Resale') # display overall hdb transactions trend chart table2 = data.groupby( "year_sold")["resale_price"].count().reset_index() table2 = table2.rename(columns={"resale_price": "number_of_resale"}) resale_transaction = px.line(table2, x="year_sold", y="number_of_resale") resale_transaction.update_layout( title_text='HDB Resale Transactions between (1990 - 2019)', template='ggplot2') st.plotly_chart(resale_transaction) # chart commentary st.markdown( 'Implementation of the revised [cooling measures](https://www.srx.com.sg/cooling-measures) to cool the residential market from 2010 onwards\ led to the drop in resale price and low number of units sold during this period.Specifically the lowering of LTV(Loan-To-Value) from \ 90% to 80% - meaning buyers have to pay more initally.') # display overall dollar per square meter based on flat type data['dollar_psf'] = data['resale_price'] / (data['floor_area_sqm'] * 10.764) table3 = data.groupby(["year_sold", 'flat_type' ])["dollar_psf", ].agg(["median" ]).reset_index() table3.rename(columns={"median": "dollar_psf"}, inplace=True) dollar_per_sq_f = px.line(table3, x="year_sold", y="dollar_psf", color='flat_type') dollar_per_sq_f.update_layout( title_text= 'Median Dollar Per Square Feet between 1990 and 2019 based on flat type', template='ggplot2') st.plotly_chart(dollar_per_sq_f) # chart commentary st.markdown( 'Similar trend if we break down based on flat type, the median went up by two fold from 2007 to 2013 and gradually\ went down because of additional cooling measures') # display overall dollar per square meter based on storey table4 = data.groupby(["year_sold", 'storey_range' ])["dollar_psf", ].agg(["median" ]).reset_index() table4.rename(columns={"median": "dollar_psf"}, inplace=True) median_storey = px.line(table4, x="year_sold", y="dollar_psf", color='storey_range') median_storey.update_layout( title_text= 'Median Dollar Per Square Feet between 1990 and 2019 based on storey', template='ggplot2') st.plotly_chart(median_storey) st.markdown( 'Similar trend if we break down based on storey, but for high storey more than 40, price is still climbing.\ We can also notice that high rise flats ( > 30 storeys ) starts from around 2005 onwards( less 3 years)' ) st.markdown( '**We decided to work with data from 2013**. This is because the 1997 Asian financial crisis is a once off event and does not provide an \ accurate reflection of the current situation.In addition, with the cooling measures still in place, using data from 2013\ will ensure consistency in this aspect.') st.subheader( 'Complete notebook can be found [here](https://nbviewer.jupyter.org/github/andrewng88/hdb/blob/master/2_Exploratory_Data_Analysis.ipynb)' ) # When 'Interactive Charts' is selected at the menu. if choices == 'Interactive Charts': st.title('Interactive Charts') # 3D map component st.subheader("HDB Transactions Visualized using 3D") # from 1990 to 2019, defaults to 2019 year = st.slider('Year to look at', 1990, 2019, 2019) data = data[data['year_sold'] == year] st.markdown("HDB transactions in **%i**" % (year)) midpoint = (np.average(data["latitude"]), np.average(data["longitude"])) st.write( pdk.Deck( map_style="mapbox://styles/mapbox/light-v9", #display the mid of SG initial_view_state={ "latitude": midpoint[0], "longitude": midpoint[1], "zoom": 11, "pitch": 50, }, #displays the GPS of each HDB based on year_sold, GPS layers=[ pdk.Layer("HexagonLayer", data=data[['year_sold', 'latitude', 'longitude']], get_position=["longitude", "latitude"], auto_highlight=True, radius=100, extruded=True, pickable=True, elevation_scale=4, elevation_range=[0, 1000]), ], )) #displays the Median price by Flat type st.subheader('View HDB Median price by Flat type') flat_type_values = sorted(list(data['flat_type'].unique())) flat_type_values.insert(0, 'ALL') flat_option = st.selectbox("Flat_type", flat_type_values) sort_option = st.radio("Sort by", ("Ascending", "Descending")) flat_type_display_text = f'<sup>You selected {flat_option} and {sort_option}</sup>' st.markdown(flat_type_display_text, unsafe_allow_html=True) sort_option_dict = { 'Ascending': False, 'Descending': True, } if flat_option == 'ALL': revenue_all = data.groupby( ['town'])['resale_price'].median().reset_index().sort_values( by=['resale_price'], ascending=sort_option_dict[sort_option]) figure_to_plot = revenue_all else: revenue = data[data['flat_type'] == flat_option] revenue = revenue.groupby( ['town'])['resale_price'].median().reset_index().sort_values( by=['resale_price'], ascending=sort_option_dict[sort_option]) figure_to_plot = revenue fig_median = px.bar(figure_to_plot, x='resale_price', y='town', orientation="h", height=600, template='ggplot2') fig_median_title = f'HDB Median price for {flat_option} flats in {sort_option} order' fig_median.update_layout(title_text=fig_median_title) st.plotly_chart(fig_median) #displays the Median price by MRT st.subheader('View HDB Median price by MRT') mrt_values = sorted(list(data2013['mrt'].unique())) mrt_values.insert(0, 'ALL') mrt_option = st.selectbox("MRT", mrt_values) mrt_display_text = f'<sup>You selected {mrt_option}</sup>' st.markdown(mrt_display_text, unsafe_allow_html=True) if mrt_option == 'ALL': mrt_all = data2013.query('nearest_mrt_distance <1').groupby( ['mrt'])['resale_price'].median().reset_index().sort_values( by=['resale_price']) fig_median = px.bar(mrt_all, x='resale_price', y='mrt', orientation='h', height=600, template='ggplot2') st.write(mrt_all) else: mrt = data2013[data2013['mrt'] == mrt_option] mrt = mrt.query('nearest_mrt_distance <1').groupby([ 'mrt', 'flat_type' ])['resale_price'].median().reset_index().sort_values( by=['resale_price']).drop('mrt', axis=1) fig_median = px.bar(mrt, x='flat_type', y='resale_price', height=400, template='ggplot2') fig_median_title = f'HDB Median price for HDB flats near {mrt_option}' fig_median.update_layout(title_text=fig_median_title) st.plotly_chart(fig_median) # When 'Prediction' is selected at the menu. if choices == 'Prediction': st.subheader('Predictions') # load the unique database for speed df_unique_deploy = load_data('data/df_unique_deploy.csv') #obtain Postcode input from end user input_postcode = st.text_input("Postcode : ", 560216) #560216 postcode_list = df_unique_deploy['postcode'].unique().tolist() # we proceed with HDB transaction prediction, if the postcode is in the list if int(input_postcode) in postcode_list: input_postcode_results = f"Postcode is **{input_postcode}** " #auto retrieve the flat_type for selection based on postcode flat_type = df_unique_deploy[df_unique_deploy['postcode'] == int( input_postcode)]['flat_type'].unique().tolist() flat_type = st.selectbox("The flat_type", (flat_type)) flat_type_results = f"Flat Type is **{flat_type}**." #auto retrieve the flat_model for selection based on postcode f_model = df_unique_deploy[df_unique_deploy['postcode'] == int( input_postcode)]['flat_model'].unique().tolist() flat_model = st.selectbox("The flat_model", (f_model)) flat_model_results = f"Flat Model is **{flat_model}**." #auto retrieve town for selection based on postcode town = df_unique_deploy[df_unique_deploy['postcode'] == int( input_postcode)]['town'].unique()[0] town_results = f" and it is located in **{town }** town ." #storey requires input from end user as we're not mind reader :P storey = st.slider("Storey level : ", 1, 50, 6) #8 storey_results = f"Storey is **{storey}**." #auto retrieve floor_area_sqm for selection based on postcode area = df_unique_deploy[df_unique_deploy['postcode'] == int( input_postcode)]['floor_area_sqm'].unique().tolist() floor_area_sqm = st.selectbox("Floor_area_sqm", (area)) area_results = f"Area is **{floor_area_sqm }**." # calculate remaining lease = start year + 99 - current year today = date.today() year_sold = today.year month_sold = today.month lease_commence_date = df_unique_deploy[ df_unique_deploy['postcode'] == int( input_postcode)]['lease_commence_date'].tolist()[0] remaining_lease = int(lease_commence_date) + 99 - year_sold remaining_lease_results = f"Remaining lease is **{remaining_lease}** years ." #auto retrieve nearest_mrt_distance for selection based on postcode nearest_mrt_distance = df_unique_deploy[ df_unique_deploy['postcode'] == int(input_postcode)][ 'nearest_mrt_distance'].unique().tolist()[0] nearest_mrt_distance_results = f"MRT is **{nearest_mrt_distance:.2f}** km away." #auto retrieve CBD_distance for selection based on postcode CBD_distance = df_unique_deploy[ df_unique_deploy['postcode'] == int( input_postcode)]['CBD_distance'].unique().tolist()[0] cbd_distance_results = f"CBD is **{CBD_distance:.2f}** km away." #auto retrieve nearest_mall_distance for selection based on postcode nearest_mall_distance = df_unique_deploy[ df_unique_deploy['postcode'] == int(input_postcode)][ 'nearest_mall_distance'].unique().tolist()[0] nearest_mall_distance_results = f"Nearest Mall is **{nearest_mall_distance:.2f}** km away." #auto retrieve nearest_school_distance nearest_school_distance = df_unique_deploy[ df_unique_deploy['postcode'] == int(input_postcode)][ 'nearest_school_distance'].unique().tolist()[0] nearest_school_distance_results = f"Nearest school is **{nearest_school_distance:.2f}** km away." #condolidate all data for prediction sample_data = [[ floor_area_sqm, year_sold, month_sold, remaining_lease, nearest_mrt_distance, CBD_distance, nearest_mall_distance, nearest_school_distance, storey, town, flat_type, flat_model ]] list_columns = [ 'floor_area_sqm', 'year_sold', 'month_sold', 'remaining_lease', 'nearest_mrt_distance', 'CBD_distance', 'nearest_mall_distance', 'nearest_school_distance', 'storey', 'town', 'flat_type', 'flat_model' ] sample_data = pd.DataFrame(sample_data, columns=list_columns) #load model and predict predictor = load_prediction_models('data/rf.sav') predictor.predict(sample_data) #display data input if st.checkbox('Verbose ON/OFF:'): st.markdown(input_postcode_results + town_results) st.markdown(flat_type_results) st.markdown(flat_model_results) st.markdown(storey_results) st.markdown(area_results) st.markdown(remaining_lease_results) st.markdown(nearest_mrt_distance_results) st.markdown(cbd_distance_results) st.markdown(nearest_mall_distance_results) st.markdown(nearest_school_distance_results) st.write('Data collated for prediction:') st.write(sample_data) #prefix $ and convert prediction to int prediction = "{} {}".format('$', int(predictor.predict(sample_data))) st.subheader('HDB valuation:') st.success(prediction) #display other HDB data from the same block st.subheader( "Other transactions from 2013 onwards(sorted by latest transaction)" ) st.dataframe(data2013[data2013['postcode']==int(input_postcode)].sort_values(by='month', ascending=False)\ [['resale_price','dollar_psf','month','flat_type','flat_model','storey_range','lease_commence_date','floor_area_sqm']]) #message to display if Postcode does not exists else: st.warning('Please input valid Postcode') if choices == 'About': st.header('About') st.subheader('Project by:') st.markdown('**Andrew Ng** [email protected]') st.markdown('https://www.linkedin.com/in/sc-ng-andrew/') st.markdown('**Lau Lee Ling** [email protected]') st.markdown('https://www.linkedin.com/in/lauleeling/')
st.subheader("Distribuição de imóveis por preço") # definindo a faixa de valores faixa_valores = st.slider("Faixa de preço", float(data.MEDV.min()), 150., (10.0, 100.0)) # filtrando os dados dados = data[data['MEDV'].between(left=faixa_valores[0], right=faixa_valores[1])] # plot a distribuição dos dados f = px.histogram(dados, x="MEDV", nbins=100, title="Distribuição de Preços") f.update_xaxes(title="MEDV") f.update_yaxes(title="Total Imóveis") st.plotly_chart(f) st.sidebar.subheader("Defina os atributos do imóvel para predição") # mapeando dados do usuário para cada atributo crim = st.sidebar.number_input("Taxa de Criminalidade", value=data.CRIM.mean()) indus = st.sidebar.number_input("Proporção de Hectares de Negócio", value=data.CRIM.mean()) chas = st.sidebar.selectbox("Faz limite com o rio?", ("Sim", "Não")) # transformando o dado de entrada em valor binário chas = 1 if chas == "Sim" else 0 nox = st.sidebar.number_input("Concentração de óxido nítrico", value=data.NOX.mean())
"Select countries", myData.countryNames.tolist(), myData.getTopDailyNewCasesByCountry("confirmed").index.tolist()) st.latex(myData.getCumulativeDataSummary(countryNameOptions)) ############################################################################# st.markdown("""---""") daysOption = st.radio("", ("last 45 days", "last 60 days", "all days"), 0) st.write( '<style>div.Widget.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True) myData.setNumDays(option=daysOption) st.plotly_chart( myData.getTopCountriesNewCasesGraph(option="confirmed", numCountries=5)) st.plotly_chart( myData.getTopCountriesNewCasesGraph(option="deaths", numCountries=5)) #st.plotly_chart(myData.getTopCountriesActivePercentGraph(numCountries=5,numDays=45)) ############################################################################# st.markdown("""---""") countsOption = st.radio("Select an option", ("confirmed", "active", "recovered", "deaths", "activeRatio", "recoveredRatio", "deathsRatio"), 2) st.plotly_chart(myData.getGlobalCountsMap(countsOption)) st.plotly_chart(myData.getGlobalCountsScatterPlot(countsOption))
def app(): st.title("Welcome to IPL-cric-data!") st.sidebar.title('Find Player Profile') user_input_player = st.sidebar.text_input( label="Enter Cricketer's Name Eg. (Tendulkar)" ) #, value="SR Tendulkar") if not user_input_player: st.write( "You can try putting a cricket player's name in the left panel to see his profile as well as visualise the data." ) if user_input_player: player_name = find_name(user_input_player, ipl=True) #player_name = user_input_player if player_name is None: st.markdown('**' + user_input_player + '** ' + " is not found.") else: bat_bowl = st.sidebar.selectbox(label="Batting/Bowling Profile", options=("bat", "bowl")) bat = True xaxis = 'season' yaxis = 'Runs' if bat_bowl == 'bowl': bat = False yaxis = 'Wickets' year_from = st.sidebar.number_input("Year from", min_value=2008, max_value=2021, value=2008, step=1) year_to = st.sidebar.number_input("Year to", min_value=2008, max_value=2021, value=2021, step=1) visualize = st.sidebar.checkbox(label="Visualize", value=False) st.markdown('**' + player_name + '**') df = get_player_profile(player_name, batsman=bat, year_from=year_from, year_to=year_to, ipl=True) st.table(df) if visualize: numeric_cols = list( df.select_dtypes(include=np.number).columns.values) xaxis = st.sidebar.selectbox(label="x-axis", options=numeric_cols) yaxis = st.sidebar.selectbox(label="y-axis", options=numeric_cols, index=numeric_cols.index(yaxis)) fig = px.bar(df, x=xaxis, y=yaxis) #, range_x=[year_from, year_to]) st.plotly_chart(fig) #st.sidebar.title('Team Profile') #all_ipl_teams=("Chennai Super Kings", "Delhi Capitals", "Punjab Kings", "Kolkata Knight Riders", # "Mumbai Indians", "Rajasthan Royals", "Royal Challengers Bangalore", "Sunrisers Hyderabad") #team_name = st.sidebar.selectbox(label="Team name", # options=all_ipl_teams) Footer()
st.markdown( 'The majority of hospitals in NY are acute care, while the least are childrens hospitals. ' ) st.subheader('Hospital Ownership - NY') ownership_ny = df_hospital_2[df_hospital_2['state'] == 'NY'] bar4 = ownership_ny['hospital_ownership'].value_counts().reset_index() st.dataframe(bar4) st.markdown( 'The majority of hospitals in NY are Private non-profit voluntary, while the least are owned by the department of defense. ' ) st.subheader('With a PIE Chart:') fig = px.pie(bar4, values='hospital_ownership', names='index') st.plotly_chart(fig) st.subheader('Map of NY Hospital Locations') hospitals_ny_gps = hospitals_ny['location'].str.strip('()').str.split( ' ', expand=True).rename(columns={ 0: 'Point', 1: 'lon', 2: 'lat' }) hospitals_ny_gps['lon'] = hospitals_ny_gps['lon'].str.strip('(') hospitals_ny_gps = hospitals_ny_gps.dropna() hospitals_ny_gps['lon'] = pd.to_numeric(hospitals_ny_gps['lon']) hospitals_ny_gps['lat'] = pd.to_numeric(hospitals_ny_gps['lat']) st.map(hospitals_ny_gps)
def monta_estados(taxa_mortalidade): df = load_data_brasil_io() states = df['state'].sort_values(ascending=True).unique() if states is not None: state = st.sidebar.selectbox('Qual o estado você deseja visualizar?', states) dados_estado =df[(df['state'] == state)&(df['place_type']=='state')] dados_estado_cities =df[(df['state'] == state)&(df['place_type'] != 'state')] st.subheader(f"Dados de COVID em {state}") dados_estado_plot = dados_estado[['date', 'confirmed', 'deaths']].sort_values(by=['date'], ascending=True) dados_estado_plot.reset_index(drop=True, inplace=True) dados_estado_plot.set_index(['date'], inplace=True) hoje = dados_estado[dados_estado['is_last']] hoje.reset_index(drop=True, inplace=True) dia_atual = hoje['date'].dt.strftime('%d-%m-%Y')[0] confirmados = hoje['confirmed'][0] mortes = hoje['deaths'][0] quantidade_estimada = (100 * mortes / taxa_mortalidade).astype(int) taxa = round(hoje['death_rate'][0] * 10000) / 100 st.markdown(f"O estado de **{state}** teve até o dia **{dia_atual}** " f"um total de **{confirmados}** casos confirmados e" f" **{mortes}** mortes com uma taxa de mortalidade de **{taxa}%**.") if mortes > 0: st.markdown(f"Com base na taxa de mortalidade de outros países (**{taxa_mortalidade}%** dos infectados) " f"a quantidade estimada de infectados seria de **{quantidade_estimada}** para a quantidade de mortos atual.") #st.line_chart(dados_estado_plot) data_state = get_map_state(state) data_cities = get_map_city(state) view = get_view(state) slide = st.slider('Semana epidemiológica', 0, 255, 1 ) dia_atual_mapa = dados_estado_cities[dados_estado_cities.is_last==True] st.write( dia_atual_mapa) for feature in data_cities['features']: id_city = feature['id'] dados =dia_atual_mapa[dia_atual_mapa.city_ibge_code == id_city].reset_index().T.rename(columns={0: 'dados'}) feature['properties'] = dados.to_dict() # m = folium.Map(location=[45.5236, -122.6750]) # html = m.get_root().render() # st.markdown(html.encode('utf8'),False) #st.write(data_cities) # Set the viewport location view_state = pdk.ViewState( longitude=view[1], latitude=view[0], zoom=6, min_zoom=1, max_zoom=60, pitch=50,#40.5, bearing=0)#-27.36 geojson = pdk.Layer( 'GeoJsonLayer', data_state, opacity=1, #stroked=False, filled=True, #extruded=True, #wireframe=True, get_fill_color=[255, 255, 255], get_line_color=[100, 100, 90], #pickable=True ) geojson2 = pdk.Layer( 'GeoJsonLayer', data_cities, opacity=0.8, stroked=False, filled=True, extruded=True, wireframe=True, get_elevation='properties.dados.deaths*1000', get_fill_color='[255/2, properties.dados.confirmed , 255]', get_line_color=[0, slide, 255], pickable=True ) max_val=1000 min_val=0 # Combined all of it and render a viewport r = pdk.Deck(layers=[geojson,geojson2], tooltip={"html": f"<b>Color Value:</b> {state}", "style": {"color": "white"}}, initial_view_state=view_state, height=800, width=800, map_style="mapbox://styles/mapbox/light-v9", mapbox_key='pk.eyJ1IjoidGVvcmlhIiwiYSI6ImNqODRpNWJrNjA5dGIyd3FoMnZ6am13NjcifQ.OgxGf081lfoKQAOhlYh1Tg' ) st.pydeck_chart(r) dados_estado_melt = pd.melt( dados_estado[['date', 'confirmed', 'deaths']], id_vars=['date'], value_vars=['confirmed', 'deaths']) df = dados_estado_melt.groupby(["date", 'variable']).sum().reset_index() fig = px.line(df, x="date", y="value", color='variable') fig.update_layout(title=f'Casos de Covid em {state}', xaxis_title='Data', yaxis_title='Número de casos') st.plotly_chart(fig) # # """This app demonstrates the use of the awesome [deck.gl]() framework for visual # exploratory data analysis of large datasets. # # Deck.gl is now (as of Streamlit v. 0.53) supported via the # [`st.pydeck_chart`](https://docs.streamlit.io/api.html?highlight=pydeck#streamlit.pydeck_chart) # function. # # We use data from the # [Global Power Plant Database](http://datasets.wri.org/dataset/globalpowerplantdatabase) to # illustrate the locations, fuel types and capacities of the worlds power plants. # """ # # # import pathlib # # import pandas as pd # import pydeck as pdk # import streamlit as st # # POWER_PLANT_PATH = ( # pathlib.Path.cwd() / "gallery/global_power_plant_database/global_power_plant_database.csv" # ) # # POWER_PLANT_URL = ( # "https://raw.githubusercontent.com/MarcSkovMadsen/awesome-streamlit/master/" # "gallery/global_power_plant_database/global_power_plant_database.csv" # ) # # LATITUDE_COLUMN = "latitude" # LONGITUDE_COLUMN = "longitude" # # LOCATIONS = { # "Orsted Copenhagen HQ": {"latitude": 55.676098, "longitude": 12.568337}, # "Orsted Boston": {"latitude": 2.361145, "longitude": -71.057083}, # } # ORSTED_CPH_HQ = LOCATIONS["Orsted Copenhagen HQ"] # # FUEL_COLORS = { # "Oil": "black", # "Solar": "green", # "Gas": "black", # "Other": "gray", # "Hydro": "blue", # "Coal": "black", # "Petcoke": "black", # "Biomass": "green", # "Waste": "green", # "Cogeneration": "gray", # "Storage": "orange", # "Wind": "green", # } # # COLORS_R = {"black": 0, "green": 0, "blue": 0, "orange": 255, "gray": 128} # # COLORS_G = {"black": 0, "green": 128, "blue": 0, "orange": 165, "gray": 128} # # COLORS_B = {"black": 0, "green": 0, "blue": 255, "orange": 0, "gray": 128} # # # class ViewStateComponent: # """Component to let the user set the initial view state to for example Copenhagen or Boston""" # # def __init__(self): # self.latitude = ORSTED_CPH_HQ["latitude"] # self.longitude = ORSTED_CPH_HQ["longitude"] # self.zoom = 1 # self.pitch = 40.0 # # def edit_view(self): # """Lets the user edit the attributes""" # location = st.sidebar.selectbox("Location", options=list(LOCATIONS.keys()), index=0) # self.latitude = LOCATIONS[location]["latitude"] # self.longitude = LOCATIONS[location]["longitude"] # # self.zoom = st.sidebar.slider("Zoom", min_value=0, max_value=20, value=self.zoom) # self.pitch = st.sidebar.slider( # "Pitch", min_value=0.0, max_value=100.0, value=self.pitch, step=10.0 # ) # # @property # def view_state(self) -> pdk.ViewState: # """The ViewState according to the attributes # # Returns: # pdk.ViewState -- [description] # """ # return pdk.ViewState( # longitude=self.longitude, # latitude=self.latitude, # zoom=self.zoom, # min_zoom=0, # max_zoom=15, # pitch=self.pitch, # # bearing=-27.36, # ) # # # class GlobalPowerPlantDatabaseApp: # """The main app showing the Global Power Plant Database""" # # def __init__(self): # self.view_state_component = ViewStateComponent() # self.data = self.get_data() # self.show_data = False # # @staticmethod # @st.cache # def get_data() -> pd.DataFrame: # """The Global Power Plant data # # Returns: # pd.DataFrame -- The Global Power Plant data cleaned and transformed # """ # try: # data = pd.read_csv(POWER_PLANT_PATH) # except FileNotFoundError: # data = pd.read_csv(POWER_PLANT_URL) # # # Clean # data.primary_fuel = data.primary_fuel.fillna("NA") # data.capacity_mw = data.capacity_mw.fillna(1) # # # Transform # data["primary_fuel_color"] = data.primary_fuel.map(FUEL_COLORS) # data["primary_fuel_color"] = data["primary_fuel_color"].fillna("gray") # data["color_r"] = data["primary_fuel_color"].map(COLORS_R) # data["color_g"] = data["primary_fuel_color"].map(COLORS_G) # data["color_b"] = data["primary_fuel_color"].map(COLORS_B) # data["color_a"] = 140 # # return data[ # [ # "capacity_mw", # LATITUDE_COLUMN, # LONGITUDE_COLUMN, # "primary_fuel_color", # "color_r", # "color_g", # "color_b", # "color_a", # ] # ] # # def _scatter_plotter_layer(self): # return pdk.Layer( # "ScatterplotLayer", # data=self.data, # get_position=[LONGITUDE_COLUMN, LATITUDE_COLUMN], # get_fill_color="[color_r, color_g, color_b, color_a]", # get_radius="capacity_mw*10", # pickable=True, # opacity=0.8, # stroked=False, # filled=True, # wireframe=True, # ) # # def _deck(self): # return pdk.Deck( # map_style="mapbox://styles/mapbox/light-v9", # initial_view_state=self.view_state_component.view_state, # layers=[self._scatter_plotter_layer()], # tooltip={"html": "<b>Color Value:</b> {primary_fuel}", "style": {"color": "white"}}, # ) # # def view(self): # """Main view of the app""" # # self.view_state_component.edit_view() # Does not work # st.write(__doc__) # # st.pydeck_chart(self._deck()) # # st.write( # """The maps shows the power plant # # - **location** by latitude, longitude coordinates # - **fuel type** by color and # - **capacity in MW** by bubble size # """ # ) # st.json(FUEL_COLORS) # # st.write( # """Unfortunately **tooltips are not supported**. And there are also other issues. # See # # - [Issue 984](https://github.com/streamlit/streamlit/issues/984) # - [Issue 985](https://github.com/streamlit/streamlit/issues/985)""" # ) # # # APP = GlobalPowerPlantDatabaseApp() # APP.view()
def compute_class(data, target): st.title("Classification Report") #--------------------------------------preprocessing------------------------------- drop_str = [col for col in data.columns if type(data[col][0]) == str] data_head = data.copy(deep=True) #is used in presentation data = data.drop(drop_str, axis=1) data = data.drop(target.name, axis=1) #dropping y from data pca = False # FIX THIS! if len(data.columns) > 2: pca = True #------------------------------scaling------------------------ sc_1 = StandardScaler() x_scaled = sc_1.fit_transform(data) #----------------------------splits--------------------- x_train, x_test, y_train, y_test = train_test_split(x_scaled, target, test_size=0.2, random_state=177013) #-----------------------PCA only if >2 cols--------------- if pca == True: pca = PCA(n_components=2) x_train = pd.DataFrame(data=pca.fit_transform(x_train), columns=['pc1', "pc2"]).iloc[:, :].values x_test = pca.transform(x_test) #----------------------------algorithms-----------------------------NB is disqualified # (has some reservations about neg values) classification_models = { "LR": LogisticRegression(), "SVC": SVC(kernel="rbf"), "DTC": DecisionTreeClassifier(), "RFC": RandomForestClassifier(n_estimators=500), "XGBC": XGBClassifier(n_estimators=500) } metric_dict = {} accu_dict = {} for name, algorithm in tqdm(classification_models.items()): model = algorithm model.fit(x_train, y_train) y_pred = model.predict(x_test) metric_dict[name] = { "precision": round( precision_score(y_test, y_pred, pos_label=y_pred[0], average="micro"), 2), "recall": round( recall_score(y_test, y_pred, pos_label=y_pred[0], average="micro"), 2), "f1_score": round(f1_score(y_test, y_pred, average='micro'), 2), "accuracy": accuracy_score(y_test, y_pred), "confusion": confusion_matrix(y_test, y_pred), "ROC_Vals": roc_curve(y_test, y_pred, pos_label=y_pred[0]) } accu_dict[name] = accuracy_score(y_test, y_pred) #-------------------------helper FUNCTIONS--------------------- def list_maker(metric_dict, keyword="accuracy"): key_list = list(metric_dict.keys()) return [metric_dict[key][keyword] for key in key_list] def random_color(metric_dict): return [ "#" + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(metric_dict)) ] metric_df = pd.DataFrame(metric_dict).drop(["confusion", "ROC_Vals"], axis=0) metric_df.reset_index(inplace=True) #--------------------------------------------- presentation and graphs -----------------------------------0 #-------------------------------------view data -------------------------- st.header("Lets look at what we are dealing with ") st.dataframe(data_head.head()) #-----------------------------corelation_plot---------------------------------- st.header("Corelation Plot") st.markdown("zoom if intelligible") corr_val = data.corr() corr = ff.create_annotated_heatmap(y=corr_val.index.tolist(), x=corr_val.columns.tolist(), z=corr_val.values) for i in range(len(corr.layout.annotations)): corr.layout.annotations[i].font.size = 8 corr.layout.annotations[i].text = str( round(float(corr.layout.annotations[i].text), 4)) corr.update_layout(width=800, height=800) st.plotly_chart(corr) st.header("METRICS FOR CLASSIFICATION ALGORITHMS") #------------------------metric_table----------------- table = ff.create_table(metric_df) table.update_layout(width=1350) st.plotly_chart(table) #--------------heatmaps------------------------------ st.markdown("### CONFUSION MATRICES") fig = make_subplots(rows=1, cols=len(metric_df.columns[1:].values), shared_yaxes=True, horizontal_spacing=0.05, subplot_titles=metric_df.columns[1:].values) annot_var = [] axis_count = 0 row_col = [] for row in range(1, 2): for col in range(1, 6): row_col.append([row, col]) row_col_pos = 0 for al in metric_df.columns[1:].values: heatmap2 = ff.create_annotated_heatmap( z=metric_dict[al]["confusion"], x=["1_pred", "0_pred"], y=["1_true", "0_true"], annotation_text=metric_dict[al]["confusion"]) fig.add_trace(heatmap2.data[0], row_col[row_col_pos][0], row_col[row_col_pos][1]) annot_temp = list(heatmap2.layout.annotations) axis_count = axis_count + 1 row_col_pos = row_col_pos + 1 for k in range(len(annot_temp)): annot_temp[k]['xref'] = "x" + str(axis_count) annot_temp[k]['yref'] = 'y' + str(axis_count) annot_var = annot_var + annot_temp lo = list(fig['layout']["annotations"]) + annot_var fig.update_layout(annotations=lo, autosize=True, width=1350) st.plotly_chart(fig) #------------scatter plots---------------- fpr, tpr, thres = roc_curve(y_test, y_pred, pos_label=y_pred[0]) scatter_plot = go.Figure( go.Scatter(x=[0, 1], y=[0, 1], mode="lines", name="ref")) for al in metric_df.columns[1:].values: AUC_val = auc(metric_dict[al]["ROC_Vals"][0].tolist(), metric_dict[al]["ROC_Vals"][1].tolist()) scat = go.Scatter(x=metric_dict[al]["ROC_Vals"][0].tolist(), y=metric_dict[al]["ROC_Vals"][1].tolist(), name=f"{al} - AUC val - {AUC_val:.2f}") scatter_plot.add_trace(scat) scatter_plot.update_layout(width=1300, height=500) st.header("ROC_curves") st.plotly_chart(scatter_plot) #-------------funnel-chart----------------- st.header("Recommendations") st.markdown( "the percent below classifier represents recommended probability for classifier" ) accu_dict = dict( sorted(accu_dict.items(), key=lambda item: item[1], reverse=True)) funnel = go.Figure( go.Funnelarea(values=list(accu_dict.values()), text=list(accu_dict.keys()))) funnel.update_layout(showlegend=False) st.plotly_chart(funnel)
key='1') sentiment_count = data['airline_sentiment'].value_counts() sentiment_count = pd.DataFrame({ 'Sentiment': sentiment_count.index, 'Tweets': sentiment_count.values }) if not st.sidebar.checkbox('Hide', True): st.markdown('### Number of Tweets by Sentiment') if select == 'Histogram': fig = ps.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500) st.plotly_chart(fig) else: fig = ps.pie(sentiment_count, names='Sentiment', values='Tweets', color='Tweets', height=500) st.plotly_chart(fig) #plotting interactive map st.sidebar.subheader('When and where are users tweeeting from?') hour = st.sidebar.slider('Hour of the day', 0, 23) hour = st.sidebar.number_input('Hour of the day', min_value=1, max_value=24) modified_data = data[data['tweet_created'].dt.hour == hour] if not st.sidebar.checkbox('Close', True):
def compute_reg(data, target): st.title("Regression Report") #------------------preprocessing------------------------- drop_str = [col for col in data.columns if type(data[col][0]) == str] data_head = data.copy(deep=True) #is used in presentation data = data.drop(drop_str, axis=1) data = data.drop(target.name, axis=1) #dropping y from data corr_mat = data_head.corr() # for later use in presentation pca = False # FIX THIS! if len(data.columns) > 2: pca = True #-------------------feature scaling-------------------------------- sc_1 = StandardScaler() sc_2 = StandardScaler() x_scaled = sc_1.fit_transform(data) y_scaled = sc_2.fit_transform(np.array(target).reshape(-1, 1)) x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, test_size=0.2, random_state=177013) #----------PCA only if >2 cols--------------- if pca == True: pca = PCA(n_components=2) x_train = pd.DataFrame(data=pca.fit_transform(x_train), columns=['pc1', "pc2"]).iloc[:, :].values x_test = pca.transform(x_test) #------------------------------------------model_building------------------------- #----------------POLYNOMIAL REGRESSION is disqualified for reasons regression_models = { "LINEAR_REG": LinearRegression(), "SVR": SVR(), "DTR": DecisionTreeRegressor(), "RFR": RandomForestRegressor(n_estimators=400), "XGBR": GradientBoostingRegressor(n_estimators=400) } metric_dict = {} for name, algorithm in tqdm(regression_models.items()): model = algorithm model.fit(x_train, y_train.ravel()) y_pred = model.predict(x_test) metric_dict[name] = { "Max_error": round(max_error(y_test, y_pred), 5), "MAE": round(mean_absolute_error(y_test, y_pred), 3), "MSE": round(mean_squared_error(y_test, y_pred), 3), "R2-score": round(r2_score(y_test, y_pred), 5), "RMSE": round(mean_squared_error(y_test, y_pred, squared=False), 3), "MAPE": round(mean_absolute_percentage_error(y_test, y_pred), 3) } metric_df = pd.DataFrame(metric_dict) metric_df.reset_index(inplace=True) #---------------------------Presentation---------------------------------- #-------------------------------------view data -------------------------- st.header("Lets look at what we are dealing with ") st.dataframe(data_head.head()) #-----------------------------corelation_plot---------------------------------- st.header("Corelation Plot") corr_val = corr_mat corr = ff.create_annotated_heatmap(y=corr_val.index.tolist(), x=corr_val.columns.tolist(), z=corr_val.values) for i in range(len(corr.layout.annotations)): corr.layout.annotations[i].font.size = 8 corr.layout.annotations[i].text = str( round(float(corr.layout.annotations[i].text), 4)) corr.update_layout(width=800, height=800) st.plotly_chart(corr) #-------------------------------metric table---------------------------- st.header("METRICS FOR REGRESSION ALGORITHMS") table = ff.create_table(metric_df) table.update_layout(width=1350) st.plotly_chart(table) st.markdown( "MAPE does not represent the output as a percentage in range [0, 100]. Instead, it represents in range [0, 1/eps]." ) #------------------------------RADAR_plots------------------------------ radar = go.Figure() metric_df = metric_df.drop([0], axis=0) for metric_lis in metric_df.columns[1:].values: radar.add_trace( go.Scatterpolar(r=metric_df[metric_lis].tolist(), theta=metric_df["index"].tolist(), fill='toself', name=metric_lis)) radar.update_layout( polar=dict(radialaxis=dict(visible=True, range=[0, 2])), showlegend=True, title="Radar Plot! (use legend to disable individual algorithms)", width=800, height=650) st.plotly_chart(radar)
def write(): """Used to write the page in the app.py file""" with st.spinner("Loading Map ..."): # read CSV # CSV for Choropleth Map df = pd.read_csv( "https://raw.githubusercontent.com/hannahkruck/visuasyl/master/src/datasets/Map.csv", encoding="utf8", sep=";") # CSV for Line Map df2 = pd.read_csv( "https://raw.githubusercontent.com/hannahkruck/visuasyl/master/src/datasets/Map.csv", encoding="utf8", sep=";") # Title st.title("Map view") #----------------- Side bar (filter options) ------------------- # Select map (Choropleth or Line Map) selectedMapType = st.sidebar.radio("Map", ('Choropleth Map', 'Line Map')) if selectedMapType == 'Choropleth Map': showChoropleth = True showLine = False else: showLine = True showChoropleth = False # General filter (Age, Gender) st.sidebar.header("Filters") selectedAge = st.sidebar.multiselect( "Select Age", ("under 18", "18 - 34", "35 - 64", "over 65")) selectedGender = st.sidebar.selectbox("Select Gender", ("All", "Male", "Female")) # --- Special filter for Choropleth Map -- st.sidebar.header("Filter for Choropleth Map") # Drop down menu for Choropleth Map Information selectedMapChoropleth = st.sidebar.selectbox( "Select Map Information", ('Applications to target countries', 'Applicants by country of origin')) # Information for Choropleth Map based on the chosen map information if 'target' in selectedMapChoropleth: selectedMapChoropleth = 'destinationCountry' selectedCode = 'geoCodeDC' mapColor = 'Blues' else: selectedMapChoropleth = 'homeCountry' selectedCode = 'geoCodeHC' mapColor = 'Reds' # --- Special filter for Line Map --- st.sidebar.header("Filter for Line Map") # Select type (show routes of asylum seeker from a particular origin country or to a particular target country) selectedType = st.sidebar.radio("Select type", ('Target country', 'Origin country')) if selectedType == 'Target country': selectedType = df.destinationCountry.unique() countryCategory = 'destinationCountry' namesToShow = 'homeCountry' selectedLon = 'lonDC' selectedLat = 'latDC' else: selectedType = df.homeCountry.unique() countryCategory = 'homeCountry' namesToShow = 'destinationCountry' selectedLon = 'lonHC' selectedLat = 'latHC' # Drop down menu for selected country selectedCountryMapLine = st.sidebar.selectbox("Select country", (selectedType)) #----------------- Website content (Year slider, i-Button) ------------------- # --- Markdown for Info icon --- # CSS and HTML Code st.markdown(''' <!-- https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_transition & https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_right--> <style> .tooltip { position: relative; display: inline-block; font-size:1.6rem; } .tooltip .tooltiptext { visibility: hidden; width: 50vw; background-color: #f1f3f7; color: #262730; text-align: justify; border-radius: 6px; padding: 5px; font-size:0.9rem; /* Position the tooltip */ position: absolute; z-index: 1; top: -5px; left: 105%; opacity: 0; transition: opacity 0.8s; } .tooltip:hover .tooltiptext { visibility: visible; opacity: 1; } </style> ''', unsafe_allow_html=True) # Text for tooltip st.markdown(''' <div class="tooltip">ⓘ <span class="tooltiptext"> <b>Choropleth Map</b><br>The Choropleth Map shows the number of asylum applications per country in Europe and the number of refugees per country worldwide for the selected year (see filter 'Select Map Information' for Choropleth Map). <br><br> <b>Line Map</b><br>The Line Map presents the routes of the refugees depending on the selected type. The type 'target country' shows from which countries the asylum seekers originate based on a specific target country. The type 'origin country' indicates where the asylum seekers are fleeing to from a specific country of origin. <br><br> <b>Colour gradient</b><br> It should be noted here that the colour gradient adjusts to the maximum and minimum value, i.e. the colour changes with each filtering. </span></div> ''', unsafe_allow_html=True) # Slider to choose the year selected_year = st.slider("", (int(df["year"].min())), (int(df["year"].max()))) # Title for map regarding the chosen year st.subheader('Asylum seekers in the year %s' % selected_year) #----------------- Data preparation (general) ------------------- # Remove 'overall' and 'Überseeische Länder und Hoheitsgebiet' for both CSV indexNames = df[df['destinationCountry'] == 'Overall'].index df.drop(indexNames, inplace=True) indexNames = df[df['homeCountry'] == 'Overall'].index df.drop(indexNames, inplace=True) indexNames = df[df['destinationCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df.drop(indexNames, inplace=True) indexNames = df[df['homeCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df.drop(indexNames, inplace=True) indexNames = df2[df2['destinationCountry'] == 'Overall'].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['homeCountry'] == 'Overall'].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['destinationCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['homeCountry'] == 'Überseeische Länder und Hoheitsgebiete'].index df2.drop(indexNames, inplace=True) # Delete all cells, except one year (both maps) indexNames = df[df['year'] != selected_year].index df.drop(indexNames, inplace=True) indexNames = df2[df2['year'] != selected_year].index df2.drop(indexNames, inplace=True) #----------------- Data preparation (Choropleth Map) ------------------- # Information for Choropleth Map (df) based on the chosen gender and age df['subtotal'] = 0 # Check selected gender if selectedGender == 'Female': # if an age is selected if selectedAge: # selectedAge is a list of strings # Therefore, we have to check every entry in the list and sum up partial results in new column subtotal for i in selectedAge: if i == 'under 18': df['subtotal'] = df['subtotal'] + df['fu18'] elif i == '18 - 34': df['subtotal'] = df['subtotal'] + df['f18'] elif i == '35 - 64': df['subtotal'] = df['subtotal'] + df['f35'] elif i == 'over 65': df['subtotal'] = df['subtotal'] + df['fo65'] else: # no age is selected, that means the user wants to see all women df['subtotal'] = df['subtotal'] + df['womenTotal'] a = 'subtotal' elif selectedGender == 'Male': if selectedAge: for i in selectedAge: if i == 'under 18': df['subtotal'] = df['subtotal'] + df['mu18'] elif i == '18 - 34': df['subtotal'] = df['subtotal'] + df['m18'] elif i == '35 - 64': df['subtotal'] = df['subtotal'] + df['m35'] elif i == 'over 65': df['subtotal'] = df['subtotal'] + df['mo65'] else: df['subtotal'] = df['subtotal'] + df['menTotal'] a = 'subtotal' else: # if no gender is selected, that means the user wants to see all if selectedAge: for i in selectedAge: if i == 'under 18': df['subtotal'] = df['subtotal'] + df['mu18'] + df[ 'fu18'] elif i == '18 - 34': df['subtotal'] = df['subtotal'] + df['m18'] + df['f18'] elif i == '35 - 64': df['subtotal'] = df['subtotal'] + df['m35'] + df['f35'] elif i == 'over 65': df['subtotal'] = df['subtotal'] + df['fo65'] + df[ 'mo65'] a = 'subtotal' else: a = 'total' # Group the countries by year and sum up the number (total) in a new column sum (df['sum'] df['sum'] = df.groupby([selectedMapChoropleth, 'year'])[a].transform('sum') #----------------- Data preparation (Line Map) ------------------- # countryCategory = homeCountry or destinationCountry # selectedCountryMapLine is the selected country for the map line (for example Syria (homeCountry)) indexNames = df2[df2[countryCategory] != selectedCountryMapLine].index df2.drop(indexNames, inplace=True) df2['subtotal'] = 0 if selectedGender == 'Female': # if an age is selected if selectedAge: # selectedAge is a list of strings # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null for i in selectedAge: if i == 'under 18': indexNames = df2[df2['fu18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['fu18'] elif i == '18 - 34': indexNames = df2[df2['f18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['f18'] elif i == '35 - 64': indexNames = df2[df2['f35'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['f35'] elif i == 'over 65': indexNames = df2[df2['fo65'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['fo65'] else: indexNames = df2[df2['womenTotal'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['womenTotal'] elif selectedGender == 'Male': if selectedAge: # selectedAge is a list of strings # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null for i in selectedAge: if i == 'under 18': indexNames = df2[df2['mu18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mu18'] elif i == '18 - 34': indexNames = df2[df2['m18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m18'] elif i == '35 - 64': indexNames = df2[df2['m35'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m35'] elif i == 'over 65': indexNames = df2[df2['mo65'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mo65'] else: indexNames = df2[df2['menTotal'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['menTotal'] else: # if no gender is selected, that means the user wants to see all if selectedAge: for i in selectedAge: if i == 'under 18': indexNames = df2[df2['mu18'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['fu18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mu18'] + df2[ 'fu18'] elif i == '18 - 34': indexNames = df2[df2['m18'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['f18'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m18'] + df2[ 'f18'] elif i == '35 - 64': indexNames = df2[df2['m35'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['f35'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['m35'] + df2[ 'f35'] elif i == 'over 65': indexNames = df2[df2['mo65'] == 0].index df2.drop(indexNames, inplace=True) indexNames = df2[df2['fo65'] == 0].index df2.drop(indexNames, inplace=True) df2['subtotal'] = df2['subtotal'] + df2['mo65'] + df2[ 'fo65'] else: # all people are considered indexNames = df2[df2['total'] == 0].index df2.drop(indexNames, inplace=True) # Create list of origin or target countries to display them in hover text # Every second index must contain the country name, so a placeholder is necessary in front of it # Structur: [placeholder,name+number,placeholder,name+number,...] # name = listPlaceholderNames # number = listPlaceholderNumber listPlaceholderNames = df2[namesToShow].values.tolist() listPlaceholderNumber = df2[a].values.tolist() nameList = [] i = 0 if namesToShow == 'homeCountry': for x in listPlaceholderNames: nameList.append(i) x = x + ': ' + str(listPlaceholderNumber[i]) nameList.append(x) i = i + 1 if len(nameList) != 0: nameList[-2] = None else: for x in listPlaceholderNames: x = x + ': ' + str(listPlaceholderNumber[i]) nameList.append(x) nameList.append(i) i = i + 1 if len(nameList) != 0: nameList[-1] = None st.write( '<style>div.Widget.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True) #----------------Create Maps with Plotly (Choropleth and Line Map)--------------------------- fig = go.Figure() # Choropleth Map fig.add_trace( go.Choropleth( locations=df[selectedCode], visible=showChoropleth, z=df['sum'], text=df[selectedMapChoropleth], colorscale=mapColor, autocolorscale=False, reversescale=False, name="", marker_line_color='darkgray', marker_line_width=0.5, colorbar_tickprefix='', colorbar_title='Number of<br>asylum<br>applications<br>', )) #--------- Line Map -------------- # Set selected country fig.add_trace( go.Scattergeo( locationmode='country names', lon=df2[selectedLon], lat=df2[selectedLat], hoverinfo='text', name=selectedCountryMapLine, text=df2[countryCategory], line=dict(width=1, color='red'), opacity=0.510, visible=showLine, mode='markers', )) # NumPy Array Slicing # Longitude and Latitude lons = [] lats = [] lons = np.empty(2 * len(df2)) lons[::2] = df2['lonDC'] lons[1::2] = df2['lonHC'] lats = np.empty(2 * len(df2)) lats[::2] = df2['latDC'] lats[1::2] = df2['latHC'] # Set lines fig.add_trace( go.Scattergeo(locationmode='country names', visible=showLine, name='route and number <br>of asylum seekers', hovertemplate=nameList, lon=lons, lat=lats, mode='markers+lines', line=dict(width=1, color='red'), opacity=0.5)) # Update layout choropleth map fig.update_layout( showlegend=True, geo=go.layout.Geo( scope='world', #projection_type = 'azimuthal equal area', showland=True, showcountries=True, landcolor='rgb(243, 243, 243)', countrycolor='rgb(105,105,105)', ), ) # Update layout line map fig.update_layout( geo=dict(showframe=False, showcoastlines=False, projection_type='equirectangular'), autosize=True, margin=dict( l=0, r=0, b=0, t=20, ), ) # Display figure st.plotly_chart( fig, use_container_width=True, config={ 'modeBarButtonsToRemove': ['lasso2d', 'select2d', 'pan2d', 'hoverClosestGeo'] })
#cache for loading data @st.cache def load_data(ticker): data = yf.download(ticker, START, TODAY) data.reset_index(inplace = True) return data data_load_state = st.text("Loading Data...") data = load_data(selected_stock) data_load_state.text("Loading Data...") #Prediction of prices using Prophet df_train = data[['Date','Close']] df_train = df_train.rename(columns = {"Date":"ds", "Close":'y'}) m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods = period) forecast = m.predict(future) st.write(f'Forecast plot for {n_years} years') fig1 = plot_plotly(m,forecast) st.plotly_chart(fig1) #Extra Components graphs st.write("COMPONENTS") fig2 = m.plot_components(forecast) st.write(fig2)
if option == "Pyplot": array = np.random.normal(1, 1, size=100) plt.hist(array, bins=20) st.pyplot() if option == "Plotly Chart": x1 = np.random.randn(200) - 2 x2 = np.random.randn(200) x3 = np.random.randn(200) + 2 hist_data = [x1, x2, x3] group_labels = ['Group 1', 'Group 2', 'Group 3'] fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5]) st.plotly_chart(fig, use_container_width=True) if option == "Graphviz Chart": graph = graphviz.Digraph() graph.edge('run', 'intr') graph.edge('intr', 'runbl') graph.edge('runbl', 'run') graph.edge('run', 'kernel') graph.edge('kernel', 'zombie') graph.edge('kernel', 'sleep') graph.edge('kernel', 'runmem') graph.edge('sleep', 'swap') graph.edge('swap', 'runswap') graph.edge('runswap', 'new') graph.edge('new', 'runmem') graph.edge('sleep', 'runmem')
def main(): st.title("clustering using the top2vec") st.subheader("top words on complaint") st.write(wc(complaint_words())) st.subheader("tweet trends") st.write("this dataset based on tweets that has keyword 'koinworks'") st.subheader("visualization of the dataset") st.markdown("#### doc2vec") vectors, topic_vectors, model = load_vectors() fig = go.Figure() fig.add_trace( go.Scatter(x=[a[0] for a in vectors], y=[a[1] for a in vectors], mode="markers") ) fig.add_trace( go.Scatter( x=[a[0] for a in topic_vectors], y=[a[1] for a in topic_vectors], mode="markers", ) ) st.plotly_chart(fig) st.markdown("#### kmeans") C = _cluster() plot_df = C.plot_df() k_top, d_top = C.top_words() fig = go.Figure() fig.add_trace( go.Scatter( x=plot_df["x"], y=plot_df["y"], marker_color=plot_df["kmeans_label"], mode="markers", ) ) st.plotly_chart(fig) st.write(k_top) st.markdown("#### dbscan") fig = go.Figure() fig.add_trace( go.Scatter( x=plot_df["x"], y=plot_df["y"], marker_color=plot_df["dbscan_label"], mode="markers", ) ) st.plotly_chart(fig) st.write(d_top) st.subheader("search tweets") query = st.text_input("keyword") result = "" if query is not "": try: result = model.search_documents_by_keywords(query.split(), 50) except ValueError as e: st.write("no tweets detected, maybe try another keyword") # print('word is not in vocab') s = df_wrapper(result) st.dataframe(s, width=1000) st.subheader("similar tweets by distance")