def run(): from PIL import Image image = Image.open('saree image.jpg') image_office = Image.open('saree new image.jpg') st.image(image, use_column_width=True) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info('This app is created to predict OFFER of sarees ') st.sidebar.success('https://www.pycaret.org') st.sidebar.image(image_office) st.title("Predicting OFFER of sarees") if add_selectbox == 'Online': product_name = st.selectbox('product_name', [ 'other', 'Printed Daily Wear', 'Woven', 'Solid Fashion', 'Self Design' ]) brand_name = st.selectbox( 'brand_name', ['other', 'SAARA', 'Ad SAREES', 'KARA', 'VeBNoR']) item_name = st.selectbox('item_name', [ 'Printed Daily Wear', 'Embroidered Kanjivaram', 'Solid Fashion Cotton', 'Banarasi Art Silk', 'other' ]) Type = st.selectbox( 'Type', ['Regular Sari', 'other', 'Unstitched', 'Bollywood']) discount_price = st.number_input('discount_price', min_value=0.1, max_value=10000.0, value=0.1) orginal_price = st.number_input('orginal_price', min_value=0.1, max_value=10000.0, value=0.1) secondary_colour = st.selectbox( 'secondary_colour', ['other colour', 'Casual', 'Pink', 'Gold', 'Multicolor']) fashion = st.selectbox( 'fashion', ['Printed', 'other', 'Woven', 'Embroidered', 'Solid']) rating = st.number_input('rating', min_value=3.0, max_value=5.0, value=3.0) Trend_place = st.selectbox( 'Trend_place', ['Wedding', 'Casual', 'other', 'Dry Clean Only ', 'Machine Wash']) output = "" input_dict = { 'product_name': product_name, 'brand_name': brand_name, 'item_name': item_name, 'Type': Type, 'discount_price': discount_price, 'orginal_price': orginal_price, 'secondary_colour': secondary_colour, 'fashion': fashion, 'rating': rating, 'Trend_place': Trend_place } input_df = pd.DataFrame([input_dict]) if st.button(" PREDICT OFFER"): output = predict(model=model, input_df=input_df) output = str(output) if output == '0': output = "YOU WILL GET ABOVE 75% OFFER" else: output = "YOU WILL GET 75 AND LESSER PERCENT OFFER" st.success('The Prediction -- {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image # Image.open('../image/download.png').convert('RGB').save('../image/logo.png') # image = Image.open('../image/logo.png') Image.open('../image/kate-sade-unsplash.jpg').convert('RGB').save( '../image/employee_churn.png') image_churn = Image.open('../image/employee_churn.png') # st.image(image) add_selectbox = st.sidebar.selectbox( "How would you like to input features?", ("Single", "Batch")) st.sidebar.info( 'This app is created by Gilbert Langat to predict employees churn in organization XYZ. Data used is app is obtained from [Kaggle](https://www.kaggle.com/arvindbhatt/hrcsv)' ) st.sidebar.success('https://www.pycaret.org') st.title("Employee Churn Prediction App") st.image(image_churn, ) st.write(""" Photo by [Kate.sade](https://unsplash.com/photos/2zZp12ChxhU) on Unsplash. The model outputs 0 meaning the employee stays with the company and 1 means employee left. """) if add_selectbox == 'Single': satisfaction_level = st.sidebar.number_input('Satisfaction Level', min_value=0.1, max_value=1.0, value=0.5) last_evaluation = st.sidebar.number_input('Last Evaluation', min_value=1, max_value=100, value=25) number_project = st.sidebar.number_input('Projects', min_value=1, max_value=50, value=10) average_montly_hours = st.sidebar.number_input('Average Monthly Hours', min_value=50, max_value=400, value=200) time_spend_company = st.sidebar.number_input('Time Spent', min_value=1, max_value=30, value=10) Work_accident = st.sidebar.selectbox('Work Accident', [0, 1]) promotion_last_5years = st.sidebar.selectbox('Promotion', [0, 1]) dept = st.sidebar.selectbox('Department', [ 'accounting', 'hr', 'IT', 'management', 'marketing', 'product_mng', 'RandD', 'sales', 'support', 'technical' ]) salary = st.sidebar.selectbox('Salary', ['high', 'low', 'medium']) output = "" input_dict = { 'satisfaction_level': satisfaction_level, 'last_evaluation': last_evaluation, 'number_project': number_project, 'average_montly_hours': average_montly_hours, 'time_spend_company': time_spend_company, 'Work_accident': Work_accident, 'promotion_last_5years': promotion_last_5years, 'dept': dept, 'salary': salary } input_df = pd.DataFrame([input_dict]) if st.sidebar.button("Predict"): output = predict(model=model, input_df=input_df) output = 'Label = ' + str(output) #st.success('The output is {}'.format(output)) st.subheader("Model Prediction") st.write(output) if add_selectbox == 'Batch': file_upload = st.sidebar.file_uploader( "Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions.head()) #checking model accuracy on the unseen dataset st.subheader("Model Accuracy on the batch data") st.write( check_metric(predictions['left'], predictions['Label'], metric='Accuracy'))
def predict(model, input_df): predictions_df = predict_model(estimator=model, data=input_df) predictions = predictions_df['Label'][0] return predictions
def run(): from PIL import Image image = Image.open('logo.jpg') image_stock = Image.open('stock.jpg') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("예측 방법 결정", ("Online", "Batch")) st.sidebar.info('프로젝트명 :' + '\n' + '자연어 처리 기반의 투자분석 및 예측시스템 개발') st.sidebar.success('★멘토님★ : 정좌연 PE') st.sidebar.info('팀명 : 턴어라운드') st.sidebar.success('팀원 : 이지훈, 이문형, 강민재, 구병진, 김서정') st.sidebar.image(image_stock) st.title("KOSPI 지수 및 YG 종목 주가 예측 모델") # 사용자 설정 if add_selectbox == 'Online': date = str( st.number_input('Date', min_value=20200101, max_value=20201231, value=20201027)) rev_date = date[0:4] + '-' + date[4:6] + '-' + date[6:] target = st.selectbox('Target', ['KOSPI', 'YG']) method = st.selectbox( 'Method', ['AutoML_CLA', 'AutoML_REG', 'ARIMA', 'Prophet', 'RL', 'NLP']) output = "" input_dict = {'Date': date, 'Target': target, 'Method': method} input_ = DataCollectionModel.DataCollection(date) prophet_input_ = ProphetModel.Prophet_(date) # 코스피 예측모델 데이터 수집 + 학습 데이터 준비 if target == 'KOSPI': input_df = input_.kospi_collection() if method == 'AutoML_CLA': # 예측 모델 model = load_model('deployment_kospi_20201029') # 학습 평가 모델 model_train = load_model('deployment_kospi_train_20201029') load_test_model = predict_model(model_train, data=input_df[0].iloc[382:]) test_model = load_test_model[['Labeling', 'Label']] acc_ = accuracy_score(test_model['Labeling'], test_model['Label']) auc_ = roc_auc_score(test_model['Labeling'], test_model['Label']) recall_ = recall_score(test_model['Labeling'], test_model['Label']) prec_ = precision_score(test_model['Labeling'], test_model['Label']) f1_ = f1_score(test_model['Labeling'], test_model['Label']) data = { 'ACC': [acc_], 'AUC': [auc_], 'RECALL': [recall_], 'PREC': [prec_], 'F1': [f1_] } score_model = pd.DataFrame( data=data, columns=['ACC', 'AUC', 'RECALL', 'PREC', 'F1']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) elif method == 'AutoML_REG': # 예측 모델 model = load_model('deployment_kospi_reg_20201029') # 학습 평가 모델 model_train = load_model('deployment_kospi_reg_train_20201029') reg_data = copy.deepcopy(input_df[0].iloc[382:]) del reg_data['Labeling'] load_test_model = predict_model(model_train, data=reg_data) test_model = load_test_model[['Close', 'Label']] mae_ = mean_absolute_error(test_model['Close'], test_model['Label']) mse_ = mean_squared_error(test_model['Close'], test_model['Label']) rmse_ = mean_squared_error(test_model['Close'], test_model['Label'], squared=False) r2_ = r2_score(test_model['Close'], test_model['Label']) data = { 'MAE': [mae_], 'MSE': [mse_], 'RMSE': [rmse_], 'R2': [r2_] } score_model = pd.DataFrame( data=data, columns=['MAE', 'MSE', 'RMSE', 'R2']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) st.write("Forecast Data (Test Data)") st.line_chart(test_model) elif method == 'ARIMA': # model load 필요시 여기에 추가 print("ARIMA") elif method == 'Prophet': # model load 필요시 여기에 추가 print("Prophet") elif method == 'RL': import main # model load 필요시 여기에 추가 print("RL") elif method == 'NLP': # model load 필요시 여기에 추가 print("NLP") # YG 예측모델 데이터 수집 + 학습 데이터 준비 else: input_df = input_.yg_collection() if method == 'AutoML_CLA': # 예측 모델 model = load_model('deployment_yg_20201029') # 학습 평가 모델 model_train = load_model('deployment_yg_train_20201029') load_test_model = predict_model(model_train, data=input_df[0][341:]) test_model = load_test_model[['Labeling', 'Label']] acc_ = accuracy_score(test_model['Labeling'], test_model['Label']) auc_ = roc_auc_score(test_model['Labeling'], test_model['Label']) recall_ = recall_score(test_model['Labeling'], test_model['Label']) prec_ = precision_score(test_model['Labeling'], test_model['Label']) f1_ = f1_score(test_model['Labeling'], test_model['Label']) data = { 'ACC': [acc_], 'AUC': [auc_], 'RECALL': [recall_], 'PREC': [prec_], 'F1': [f1_] } score_model = pd.DataFrame( data=data, columns=['ACC', 'AUC', 'RECALL', 'PREC', 'F1']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) elif method == 'AutoML_REG': # 예측 모델 model = load_model('deployment_yg_reg_20201029') # 학습 평가 모델 model_train = load_model('deployment_yg_reg_train_20201029') reg_data = copy.deepcopy(input_df[0].iloc[341:]) del reg_data['Labeling'] load_test_model = predict_model(model_train, data=reg_data) test_model = load_test_model[['Close', 'Label']] mae_ = mean_absolute_error(test_model['Close'], test_model['Label']) mse_ = mean_squared_error(test_model['Close'], test_model['Label']) rmse_ = mean_squared_error(test_model['Close'], test_model['Label'], squared=False) r2_ = r2_score(test_model['Close'], test_model['Label']) data = { 'MAE': [mae_], 'MSE': [mse_], 'RMSE': [rmse_], 'R2': [r2_] } score_model = pd.DataFrame( data=data, columns=['MAE', 'MSE', 'RMSE', 'R2']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) st.write("Forecast Data (Test Data)") st.line_chart(test_model) elif method == 'ARIMA': # model load 필요시 여기에 추가 print("ARIMA") elif method == 'Prophet': # model load 필요시 여기에 추가 print("prophet") elif method == 'RL': # model load 필요시 여기에 추가 print("RL") elif method == 'NLP': print("NLP") # 예측 모델 실행 buy_message = "주가 상승 예상 -> 매매 어드바이스 : 매수" sell_message = "주가 하락 예상 -> 매매 어드바이스 : 매도" if st.button("주가 예측"): if method == 'AutoML_CLA': output = predict(model=model, input_df=input_df[0]) if output == '1': output = date + buy_message else: output = date + sell_message elif method == 'AutoML_REG': output = predict_reg(model=model, input_df=input_df) if output == '1': output = date + buy_message else: output = date + sell_message elif method == 'ARIMA': print("ARIMA") elif method == 'Prophet': if target == 'KOSPI': df_prophet = copy.deepcopy(input_df[0]) df_prophet['date'] = pd.to_datetime(df_prophet.index) df_data = df_prophet[['date', 'Close']].reset_index(drop=True) df_data = df_data.rename(columns={ 'date': 'ds', 'Close': 'y' }) prop_model = Prophet(yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', changepoint_prior_scale=0.15, changepoint_range=0.9) prop_model.add_country_holidays(country_name='KR') prop_model.fit(df_data) kor_holidays = pd.concat([ pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2021))[:, 0]) ]).reset_index(drop=True) prop_future = prop_model.make_future_dataframe(periods=10) prop_future = prop_future[prop_future.ds.dt.weekday != 5] prop_future = prop_future[prop_future.ds.dt.weekday != 6] for kor_holiday in kor_holidays: prop_future = prop_future[ prop_future.ds != kor_holiday] prop_forecast = prop_model.predict(prop_future) prop_forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']] fig1 = prop_model.plot(prop_forecast) fig2 = prop_model.plot_components(prop_forecast) #cv = cross_validation(prop_model, initial='10 days', period='20 days', horizon='5 days') #df_pm = performance_metrics(cv) #fig3 = plot_cross_validation_metric(cv, metric='rmse') st.write("Forecast Data") st.write(fig1) st.write("Component Wise Forecast") st.write(fig2) #st.write("Cross Validation Metric") #st.table(df_pm) #st.write(fig3) output = prophet_input_.prophet_kospi(input_df[0]) if output == '1': output = date + buy_message else: output = date + sell_message else: df_prophet = copy.deepcopy(input_df[0]) df_prophet['date'] = pd.to_datetime(df_prophet.index) df_data = df_prophet[['date', 'Close']].reset_index(drop=True) df_data = df_data.rename(columns={ 'date': 'ds', 'Close': 'y' }) # cp=['2019-10-23', '2019-11-04', '2019-11-13', '2019-11-22', '2019-12-04', '2019-12-13', '2019-12-26', '2020-01-08', '2020-01-17', '2020-01-31', '2020-02-11', '2020-02-20', '2020-03-03', '2020-03-12', '2020-03-23', '2020-04-02', '2020-04-13', '2020-04-23', '2020-05-08', '2020-05-19', '2020-05-29', '2020-06-09', '2020-06-18', '2020-06-30', '2020-07-09'] cp_spc = [ '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-18', '2020-08-19', '2020-08-20', '2020-08-26', '2020-08-28', '2020-08-31', '2020-09-02', '2020-09-03', '2020-09-07', '2020-09-08' ] cp_default = [ '2018-10-29', '2018-11-19', '2018-12-11', '2019-01-04', '2019-01-29', '2019-02-22', '2019-03-19', '2019-04-10', '2019-05-03', '2019-05-27', '2019-06-19', '2019-07-10', '2019-08-01', '2019-08-26', '2019-09-20', '2019-10-15', '2019-11-07', '2019-11-29', '2019-12-26', '2020-01-20', '2020-02-13', '2020-03-05', '2020-03-30', '2020-04-21', '2020-05-18' ] cp = cp_default + cp_spc prop_model = Prophet(yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', changepoints=cp, changepoint_range=0.85, changepoint_prior_scale=0.2) prop_model.fit(df_data) kor_holidays = pd.concat([ pd.Series(np.array(SouthKorea().holidays(2019))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]) ]).reset_index(drop=True) prop_future = prop_model.make_future_dataframe(periods=10) prop_future = prop_future[prop_future.ds.dt.weekday != 5] prop_future = prop_future[prop_future.ds.dt.weekday != 6] for kor_holiday in kor_holidays: prop_future = prop_future[ prop_future.ds != kor_holiday] prop_forecast = prop_model.predict(prop_future) prop_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10) fig1 = prop_model.plot(prop_forecast) fig2 = prop_model.plot_components(prop_forecast) #cv = cross_validation(prop_model, initial='10 days', period='20 days', horizon='5 days') #df_pm = performance_metrics(cv) #fig3 = plot_cross_validation_metric(cv, metric='rmse') st.write("Forecast Data") st.write(fig1) st.write("Component Wise Forecast") st.write(fig2) #st.write("Cross Validation Metric") #st.table(df_pm) #st.write(fig3) output = prophet_input_.prophet_yg(input_df[0]) if output == '1': output = date + buy_message else: output = date + sell_message st.success(output) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
"""#### No missing values ## Visualization """ plt.figure(figsize=(12,12)) plt.pie(data['Type'].value_counts(), labels=data['Type'].value_counts().index) plt.title('Class Distribution') plt.show() """## Setup, it is like pipeline where we can pass peprocessing and transformation steps.""" setup( data = data, target = 'Type', #target meaning the target feature normalize = True, train_size = 0.7 ) models() best_model = compare_models() """### Extra Trees Classifier and Random Forest are better as shwon by PyCaret.""" best_model predict_model(best_model)
def run(): from PIL import Image image = Image.open('airline sats.jfif') image_office = Image.open('side.jfif') st.image(image, use_column_width=True) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("single", "Batch")) st.sidebar.info( 'This app is created to prdicting Airline Passenger Satisfaction') st.sidebar.success('https://www.pycaret.org') st.sidebar.image(image_office) st.title("Airline satisfaction") if add_selectbox == 'single': Age = st.number_input('Age', min_value=7, max_value=85, value=7) Flight_Distance = st.number_input('Flight_Distance', min_value=31.0, max_value=4983.0, value=31.0) Infligh_wifi_service = st.number_input('Infligh_ wifi_service', min_value=0, max_value=5, value=0) Departure_Arrival_time_convenient = st.number_input( 'Departure_Arrival_time_convenient', min_value=0, max_value=5, value=0) Ease_of_Online_booking = st.number_input( 'Work_accidentEase_of_Online_booking', min_value=0, max_value=5, value=0) Gate_location = st.number_input('Gate_location', min_value=1, max_value=5, value=1) Food_and_drink = st.number_input('Food_and_drink', min_value=0, max_value=5, value=0) Online_boarding = st.number_input('Online_boarding', min_value=0, max_value=5, value=0) Seat_comfort = st.number_input('Seat_comfort', min_value=0, max_value=5, value=0) Inflight_entertainment = st.number_input('Inflight_entertainment', min_value=0, max_value=5, value=0) On_board_service = st.number_input('On_board_service', min_value=0, max_value=5, value=0) Leg_room_ervice = st.number_input('Leg room service', min_value=0, max_value=5, value=0) Baggage_handling = st.number_input('Baggage handling', min_value=1, max_value=5, value=1) Checkin_service = st.number_input('Checkin service', min_value=0, max_value=5, value=0) Inflight_service = st.number_input('Inflight service', min_value=0, max_value=5, value=0) Cleanliness = st.number_input('Cleanliness', min_value=0, max_value=5, value=0) Departure_Delay_in_Minutes = st.number_input( 'Departure Delay in Minutes', min_value=0, max_value=1305, value=0) Arrival_Delay_in_Minutes = st.number_input('Arrival Delay in Minutes', min_value=0, max_value=1280, value=0) Gender = st.selectbox('Gender', ['Female', 'Male']) Customer_Type = st.selectbox('Loyal Customer', 'disloyal Customer') Type_of_Travel = st.selectbox('Type of Travel', ['Business travel', 'Personal Travel']) Class = st.selectbox('Class', ['Business', 'Eco', 'Eco Plus', 'Bu']) output = "" input_dict = { 'Gender': Gender, 'Customer Type': Customer_Type, 'Age': Age, 'Type_of_Travel': Type_of_Travel, 'Class': Class, 'Flight_Distance': Flight_Distance, 'Infligh_ wifi_service': Infligh_wifi_service, 'Departure_Arrival_time_convenient': Departure_Arrival_time_convenient, 'Ease_of_Online_booking': Ease_of_Online_booking, 'Gate_location': Gate_location, 'Food_and_drink': Food_and_drink, 'Online_boarding': Online_boarding, 'Seat_comfort': Seat_comfort, 'Inflight_entertainment': Inflight_entertainment, 'On_board_service': On_board_service, 'Leg_room _ervice': Leg_room_ervice, 'Baggage_handling': Baggage_handling, 'Checkin_service': Checkin_service, 'Inflight_service': Inflight_service, 'Cleanliness': Cleanliness, 'Departure_Delay_in_Minutes': Departure_Delay_in_Minutes, 'Arrival_Delay_in_Minutes': Arrival_Delay_in_Minutes } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('The output is {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image image = Image.open('logo.png') image_core = Image.open('logo.png') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info('This app is created to verify qualified customers') st.sidebar.success('https://www.pycaret.org') st.sidebar.image(image) st.title("Loan Verification App") if add_selectbox == 'Online': MaritalStatus_B = st.selectbox( 'MaritalStatus_B', ['Married', 'Single', 'Divorced', 'Widow']) Gender_B = st.selectbox('Gender_B', ['M', 'F']) Location_B = st.selectbox('Location_B', ['Urban', 'SemiUrban', 'Rural']) EmployemtStatus_B = st.selectbox( 'EmployemtStatus_B', ['Unemployed', 'Worker', 'Employer', 'SelfEmployed']) Credit_score = st.number_input('Credit_score', min_value=1, max_value=1000, value=25) No_of_Dependents = st.number_input('No_of_Dependents', min_value=1, max_value=100, value=25) Age = st.number_input('Age', min_value=1, max_value=100, value=25) Available_balance = st.number_input('Available_balance', min_value=1, max_value=10000000000, value=25) Ledger_balance = st.number_input('Ledger_balance', min_value=1, max_value=100000000000000, value=25) output = "" input_dict = { 'MaritalStatus_B': MaritalStatus_B, 'Gender_B': Gender_B, 'Location_B': Location_B, 'EmployemtStatus_B': EmployemtStatus_B, 'Credit_score': Credit_score, 'No_of_Dependents': No_of_Dependents, 'Age': Age, 'Available_balance': Available_balance, 'Ledger_balance': Ledger_balance } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('The output is {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def main(): from PIL import Image image = Image.open('images/icone.jpg') image2 = Image.open('images/image.png') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info('This app is created to predict House prices') st.sidebar.image(image2) st.title("Predicting house price") if add_selectbox == 'Online': LotArea = st.number_input('Lot size in square feet :', min_value=1300, max_value=21600, value=10000) OverallQual = st.selectbox( 'Rates the overall material and finish of the house :', [ '10 Very Excellent', '9 Excellent', '8 Very Good', '7 Good', '6 Above Average', '5 Average', '4 Below Average', '3 Fair', '2 Poor', '1 Very Poor' ]) YearBuilt = st.number_input('Original construction date :', min_value=1872, max_value=2020, value=1950) YearRemodAdd = st.number_input('Remodel year :', min_value=1950, max_value=2010, value=1995) TotRmsAbvGrd = st.number_input( 'Total rooms above grade (does not include bathrooms) : ', min_value=2, max_value=14, value=3) TotalBsmtSF = st.number_input('Total square feet of basement area : ', min_value=0, max_value=7000, value=700) GrLivArea = st.number_input( 'Above grade (ground) living area square feet : ', min_value=0, max_value=7000, value=700) GarageCars = st.selectbox('Size of garage in car capacity : ', ['0', '1', '2', '3', '4', '5']) ExterQual = st.selectbox( 'Evaluates the quality of the material on the exterior : ', ['Excellent', 'Good', 'Average/Typical', 'Fair', 'Poor']) BsmtQual = st.selectbox('Evaluates the height of the basement', [ 'Excellent (100+ inches)', 'Good (90-99 inches)', 'Typical (80-89 inches)', 'Fair (70-79 inches)', 'Poor (<70 inches', 'No Basement' ]) output = "" input_dict = { 'LotArea': LotArea, 'OverallQual': [int(s) for s in OverallQual.split() if s.isdigit()][0], 'YearBuilt': YearBuilt, 'YearRemodAdd': YearRemodAdd, 'TotRmsAbvGrd': TotRmsAbvGrd, 'TotalBsmtSF': TotalBsmtSF, 'GrLivArea': GrLivArea, 'GarageCars': GarageCars, 'ExterQual': dic_ExterQual[ExterQual], 'BsmtQual': dic_BsmtQual[BsmtQual] } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('The estimated parice is : {} $'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image image = Image.open( "employeeleftimage.jpg") #for github remove /content/ path image_office = Image.open('office.jpg') #for github remove /content/ path st.image(image, use_column_width=True) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info( 'This app is created to predict if an employee will leave the company') st.sidebar.success('https://www.pycaret.org') st.sidebar.image(image_office) st.title("Predicting employee leaving") if add_selectbox == 'Online': satisfaction_level = st.number_input('satisfaction_level', min_value=0.1, max_value=1.0, value=0.1) last_evaluation = st.number_input('last_evaluation', min_value=0.1, max_value=1.0, value=0.1) number_project = st.number_input('number_project', min_value=0, max_value=50, value=5) average_montly_hours = st.number_input('average_montly_hours', min_value=0, max_value=744, value=90) time_spend_company = st.number_input('time_spend_company', min_value=1, max_value=10, value=3) Work_accident = st.number_input('Work_accident', min_value=0, max_value=50, value=0) promotion_last_5years = st.number_input('promotion_last_5years', min_value=0, max_value=50, value=0) department = st.selectbox('department', [ 'accounting', 'hr', 'IT', 'management', 'marketing', 'product_mng', 'RandD', 'sales', 'support', 'technical' ]) salary = st.selectbox('salary', ['low', 'high', 'medium']) output = "" input_dict = { 'satisfaction_level': satisfaction_level, 'last_evaluation': last_evaluation, 'number_project': number_project, 'average_montly_hours': average_montly_hours, 'time_spend_company': time_spend_company, 'Work_accident': Work_accident, 'promotion_last_5years': promotion_last_5years, 'department': department, 'salary': salary } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) if output == 0: output = "won't leave" else: output = "will leave" st.success('The prediction output is that employee {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
'Exercise Type': Exercise_type, 'Smoke': smoker, 'Alcohol drinks/week': Alcohol_drinks_per_week, 'Heart Disease': heart_disease, 'Hypothyrodism ': Hypothyrodism, 'Asthma': Asthma, 'Autoimmune': Autoimmune, 'Depression': Depression, 'High Blood Pressure': High_Blood_Pressure, 'High Cholesterol': High_Cholesterol, 'Thyroid Disease': Thyroid_Disease } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('The diabetic condition of the patient is {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def main(): from PIL import Image image = Image.open('images/icone.png') image2 = Image.open('images/image.png') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info('This app is created to predict Customer Churn') st.sidebar.image(image2) st.title("Predicting Customer Churn") if add_selectbox == 'Online': state =st.selectbox('letter code of the US state of customer residence :',['','AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA','ID',\ 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV',\ 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV','WY']) account_length = st.number_input( 'Number of months the customer has been with the current telco provider :', min_value=0, max_value=240, value=0) area_code = st.selectbox( '"area_code_AAA" where AAA = 3 digit area code :', ['', 'area_code_408', 'area_code_415', 'area_code_510']) international_plan = st.selectbox( 'The customer has international plan :', ['', 'yes', 'no']) voice_mail_plan = st.selectbox('The customer has voice mail plan :', ['', 'yes', 'no']) number_vmail_messages = st.slider('Number of voice-mail messages. :', min_value=0, max_value=60, value=0) total_day_minutes = st.slider('Total minutes of day calls :', min_value=0, max_value=360, value=100) total_day_calls = st.slider('Total day calls :', min_value=0, max_value=200, value=50) total_eve_minutes = st.slider('Total minutes of evening calls :', min_value=0, max_value=400, value=200) total_eve_calls = st.slider('Total number of evening calls :', min_value=0, max_value=200, value=100) total_night_minutes = st.slider('Total minutes of night calls :', min_value=0, max_value=400, value=200) total_night_calls = st.slider('Total number of night calls :', min_value=0, max_value=200, value=100) total_intl_minutes = st.slider( 'Total minutes of international calls :', min_value=0, max_value=60, value=0) total_intl_calls = st.slider('Total number of international calls :', min_value=0, max_value=20, value=0) number_customer_service_calls = st.slider( 'Number of calls to customer service :', min_value=0, max_value=10, value=0) output = "" input_dict={'state':state,'account_length':account_length,'area_code':area_code,'international_plan':international_plan,'voice_mail_plan':voice_mail_plan\ ,'number_vmail_messages':number_vmail_messages,'total_day_minutes':total_day_minutes,'total_day_calls':total_day_calls\ ,'total_eve_minutes':total_eve_minutes,'total_eve_calls':total_eve_calls,'total_night_minutes':total_night_minutes\ ,'total_night_calls':total_night_calls,'total_intl_minutes':total_intl_minutes,'total_intl_calls':total_intl_calls\ ,'number_customer_service_calls':number_customer_service_calls} input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('Churn : {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def train_trad_ml_baseline(train_set_name, val_set_name, use_eiz=True, demographic_features=False): ''' Trains a ensemble based classifier on a distribution based feature representation of EI or EIZ scores to predict whether or not a patient has an NMD :param train_set_name: The name of the training set to use :param val_set_name: The name of the validation set to use :param use_eiz: Whether to use EIZ or raw EI scores :param demographic_features: Whether to include demographic features. :return: A dictionary with the path to the stored model and its best operating threshold. ''' additional_features = ['Age', 'Sex', 'BMI'] if demographic_features else [] # obtain feature representations train_set = obtain_feature_rep_ml_experiment( train_set_name, use_eiz=use_eiz, additional_features=additional_features) val_set = obtain_feature_rep_ml_experiment( val_set_name, use_eiz=use_eiz, additional_features=additional_features) # map to real-valued train_set['Class'] = train_set['Class'].replace({'no NMD': 0, 'NMD': 1}) val_set['Class'] = val_set['Class'].replace({'no NMD': 0, 'NMD': 1}) # use only ensemble models models_to_use = models(type='ensemble') models_to_use = models_to_use.index.to_list() # get the set of all features in the dataset features = set(train_set.columns) features.remove('Class') # set the experiment up exp = setup(train_set, target='Class', numeric_features=features, html=False, session_id=123, train_size=0.7) # sidestep the fact that the the lib makes another validation set # manually get the pipeline pycaret uses for transforming the data pipeline = exp[7] X_train = train_set.drop(columns='Class') # transform into the format pycaret expects X_train = pipeline.transform(X_train) # overwrite the selected train set to use the entire training set instead set_config('X_train', X_train) set_config('y_train', train_set['Class']) # same logic with the val set, use our own instead of the pre-sliced one X_test = val_set.drop(columns='Class') # transform and set as the validation set X_test = pipeline.transform(X_test) # overwrite config set_config('X_test', X_test) set_config('y_test', val_set['Class']) # obtain the best model from the list, sorted by val set AUC best_model = compare_models(whitelist=models_to_use, sort='AUC', n_select=1) # interpretability output, get SHAP plots to judge feature importance interpret_model(best_model) # now, do some additional tuning, compare different hyperparemters, maximize AUC best_model = tune_model(best_model, optimize='AUC') # interpret the best model interpret_model(best_model) # the path to save the model at model_path = get_model_name(train_set_name, use_eiz, demographic_features) # save the model save_model(best_model, model_path) # get results on val set as dataframe results = predict_model(best_model, verbose=False) # get the threshold at which the model performed best on the val set best_threshold = evaluate_roc(results['Class'], results['Score'], method='val_set_training') return {'best_threshold': best_threshold, 'model_path': model_path}
def do_modeling(self, dataFrame, pipeline_dict): prob_type = st.selectbox('Select type of problem', ['Classification', 'Regression']) target_variable = st.selectbox('Select target variable', dataFrame.columns) classification_model_library = [ 'lr', 'knn', 'nb', 'dt', 'svm', 'rbfsvm', 'gpc', 'mlp', 'ridge', 'rf', 'qda', 'ada', 'gbc', 'lda', 'et', 'xgboost', 'lightgbm', 'catboost' ] tree_based_models = [ 'Random Forest Classifier', 'Decision Tree Classifier', 'Extra Trees Classifier', 'Gradient Boosting Classifier', 'Extreme Gradient Boosting', 'Light Gradient Boosting Machine', 'CatBoost Classifier' ] classification_model_names = [ 'Logistic Regression', 'K Neighbors Classifier', 'Naive Bayes', 'Decision Tree Classifier', 'SVM - Linear Kernel', 'SVM - Radial Kernel', 'Gaussian Process Classifier', 'MLP Classifier', 'Ridge Classifier', 'Random Forest Classifier', 'Quadratic Discriminant Analysis', 'Ada Boost Classifier', 'Gradient Boosting Classifier', 'Linear Discriminant Analysis', 'Extra Trees Classifier', 'Extreme Gradient Boosting', 'Light Gradient Boosting Machine', 'CatBoost Classifier' ] classification_models = dict( zip(classification_model_names, classification_model_library)) if st.checkbox('X and y Split'): X = self.get_features(dataFrame, target_variable) y = dataFrame[target_variable] st.write('Done!') if st.checkbox('X,y Info'): st.write(X) st.write(y) if st.checkbox('Scaling of data'): scale_X = self.do_standardScale(X) columns = X.columns pipeline_dict['Scaling'] = True for col in scale_X: X[col] = scale_X[col].values #X.drop(columns,axis=1,inplace=True) st.write(X) st.write('Done!') if st.checkbox('Dimensionality Reduction'): if st.checkbox('PCA'): information_loss = st.text_input( 'Enter Information loss in percentage(%)') if st.button('PCA'): pipeline_dict['PCA_info_loss'] = information_loss pca_X = self.dimred_PCA(X, information_loss) columns = X.columns for i, val in enumerate(pca_X.T): X[i] = val X.drop(columns, axis=1, inplace=True) st.write('Done!') if st.checkbox('LDA'): number_components = st.text_input( 'Enter the number of components') if st.button('LDA'): pipeline_dict['LDA_number_components'] = number_components lda = LDA(n_components=number_components) lda_X = lda.fit_transform(X, y) columns = X.columns for i, val in enumerate(lda_X.T): X[i] = val X.drop(columns, axis=1, inplace=True) st.write('Done!') if st.checkbox('Start Base-Line modeling Classification'): py_data = X py_data[target_variable] = y st.write('Name :' + str(target_variable)) st.write('Type :' + str(prob_type)) if st.checkbox('Start Modeling'): exp1 = cl.setup(data=py_data, target=target_variable, session_id=123, silent=True) st.write('Compare Models...') #models_info = cl.create_model('lr',verbose = False) models_info = cl.compare_models() st.write(models_info) if st.checkbox('Tuning Models'): tuning_model_name = st.selectbox('Select Model for Tuning', classification_model_names) if st.button('Start'): st.write(tuning_model_name) tuned_model, result = cl.tune_model( classification_models[tuning_model_name], verbose=False) st.write(result) if tuning_model_name in tree_based_models: cl.interpret_model(tuned_model) st.pyplot() cl.plot_model(tuned_model, plot='confusion_matrix') st.pyplot() else: cl.plot_model(tuned_model, plot='confusion_matrix') st.pyplot() if st.checkbox('Finalize Model'): final_model_name = st.selectbox('Select Model for Tuning', classification_model_names) if st.checkbox('Finalize'): tuned_model, result = cl.tune_model( classification_models[final_model_name], verbose=False) st.write(result) finalize_model = cl.finalize_model(tuned_model) st.write(final_model_name) st.write(finalize_model.get_params()) st.write('Done!') st.write(pipeline_dict) url = st.text_input( "Enter Test Data Url(Must be csv file)") if st.button('Click'): test_dataFrame = self.get_test_data_csv(url) st.write(test_dataFrame) for k, v in pipeline_dict.items(): if k == 'Convert_Data_Type': st.write('Convert_Data_Type') self.convert_type( test_dataFrame, pipeline_dict['Convert_Data_Type'] ['column_name'], pipeline_dict['Convert_Data_Type'] ['data_type']) elif k == 'remove_columns': st.write('remove_columns') test_dataFrame.drop( pipeline_dict['remove_columns'], axis=1, inplace=True) elif k == 'remove_columns_threshold': st.write('remove_columns_threshold..') for threshold in pipeline_dict[ 'remove_columns_threshold']: remove_columns = self.remove_null_columns( test_dataFrame, float(threshold)) test_dataFrame.drop(remove_columns, axis=1, inplace=True) elif k == 'Fill_Median_Mode_Columns': st.write('Fill_Median_Mode_Columns..') test_dataFrame = self.replace_null_columns( test_dataFrame, pipeline_dict['Fill_Median_Mode_Columns']) elif k == 'Create_Bins': st.write('Create_Bins..') column = pipeline_dict['Create_Bins'][ 'column_Name'] bins = pipeline_dict['Create_Bins'][ 'Numbers_bin'] for i, c in enumerate(column): test_dataFrame[c] = self.do_bining( test_dataFrame, c, int(bins[i])) elif k == 'OneHotEncoding': st.write('OneHotEncoding..') list_columns = pipeline_dict['OneHotEncoding'] for col in list_columns: tempdf = pd.get_dummies( data=test_dataFrame[col]) for in_col in tempdf.columns: colName = str(col) + '_' + str(in_col) test_dataFrame[colName] = tempdf[ in_col].values test_dataFrame.drop(list_columns, axis=1, inplace=True) elif k == 'LabelEncoding': st.write('LabelEncoding..') test_dataFrame = self.do_label_Encoding( test_dataFrame, pipeline_dict['LabelEncoding']) elif k == 'BinaryEncoding': st.write('BinaryEncoding..') binary_encoding_columns = pipeline_dict[ 'BinaryEncoding'] for col in binary_encoding_columns: encoder = ce.BinaryEncoder(cols=[col]) dfbin = encoder.fit_transform( dataFrame[col]) for col in dfbin.columns: test_dataFrame[col] = dfbin[col].values test_dataFrame.drop(binary_encoding_columns, axis=1, inplace=True) elif k == 'Scaling': st.write('Scaling..') scale_X = self.do_standardScale(test_dataFrame) columns = test_dataFrame.columns for col in scale_X: test_dataFrame[col] = scale_X[col].values st.write(test_dataFrame) unseen_predictions = cl.predict_model( finalize_model, data=test_dataFrame) st.write(unseen_predictions.head()) unseen_predictions.to_csv('result.csv')
def predict_match(data:MentorMenteeDetails): # dictionary to dataframe pandas df = pd.DataFrame([data.dict()]) ## Adding 'id' which holds the row index indexes = list(df.index) df['id'] = indexes ## Calling 'data_cleaning' to clean the train dataframe df_test_cleaned = data_cleaning(df) ## Calling 'feature_enginnering' function for generating features df_test_features = feature_enginnering(df_test_cleaned) # ## Calling Topic Modeling function to get the topics for each text column data_topics = np.random.randint(5, size=(1,24)) columns_topics = ['Topic_2_mentee_major', 'Topic_3_mentor_major', 'Topic_2_mentee_help_topics', 'Topic_1_mentee_major', 'Topic_0_mentor_help_topics', 'Topic_0_mentee_major', 'Topic_1_mentor_help_topics', 'Topic_2_mentee_experitse', 'Topic_0_mentor_experitse', 'Topic_0_mentee_help_topics', 'Topic_2_mentor_help_topics', 'Topic_1_mentee_experitse', 'Topic_0_mentee_experitse', 'Topic_2_mentor_experitse', 'Topic_0_mentor_major', 'Topic_1_mentor_experitse', 'Topic_3_mentee_major', 'Topic_3_mentor_help_topics', 'Topic_3_mentee_help_topics', 'Topic_1_mentee_help_topics', 'Topic_2_mentor_major', 'Topic_1_mentor_major', 'Topic_3_mentee_experitse', 'Topic_3_mentor_experitse'] df_topics = pd.DataFrame(data=data_topics,columns=columns_topics) # df_test_mentee_major_topics, test_cleanedmodel_mentee_major = getTopicModelling(df_test_cleaned, 'mentee_major') # df_test_mentee_help_topics_topics, test_cleanedmodel_mentee_help_topics = getTopicModelling(df_test_cleaned, 'mentee_help_topics') # df_test_mentee_experitse_topics, test_model_mentee_experitse = getTopicModelling(df_test_cleaned, 'mentee_experitse') # df_test_mentor_major_topics, test_model_mentor_major = getTopicModelling(df_test_cleaned, 'mentor_major') # df_test_mentor_help_topics_topics, test_model_mentor_help_topics = getTopicModelling(df_test_cleaned, 'mentor_help_topics') # df_test_mentor_experitse_topics, test_model_mentor_experitse = getTopicModelling(df_test_cleaned, 'mentor_experitse') ## Merging dataframes # List of dataframes we want to merge # test_data_frames = [df_test_cleaned[['id']], df_test_features, df_test_mentee_major_topics,df_test_mentee_help_topics_topics, # df_test_mentee_experitse_topics,df_test_mentor_major_topics,df_test_mentor_help_topics_topics,df_test_mentor_experitse_topics] test_data_frames = [df_test_cleaned[['id']], df_test_features] df_test_merged = reduce(lambda left,right: pd.merge(left,right,on=['id'], how='left'), test_data_frames) df_test_merged = pd.concat([df_topics,df_test_merged], axis=1) ## Missing Values Imputation df_test_merged.fillna(0, inplace=True) ## Dropping id column df_test_merged.drop(['id'], axis=1, inplace=True) ## Loading model from pycaret.classification import load_model,predict_model saved_model = load_model('final_lightgbm_model_06Aug2021') ## Making Predictions predictions = predict_model(saved_model, data=df_test_merged) predictions.head() score = predictions['Score'][0] return {'prediction': score}
def iaJob(self): saved_model = load_model('Prod_model') predictions = predict_model(saved_model, data=self.df_inc) return predictions
st.sidebar.header('User Input Features') st.sidebar.markdown(""" [Example CSV input file](https://github.com/LangatGilbert/100daysofcode/blob/master/Employee%20turnover%20prediction/data/example_hr.csv) """) #collect user input feaures into dataframe uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"]) input_df = pd.read_csv(uploaded_file) #Displays the user input features st.subheader('User Input features') if uploaded_file is not None: st.write(input_df) else: st.write( 'Awaiting CSV file to be uploaded. Currently using example input parameters(shown below)' ) st.write(input_df) #reading the saved classification model deployment_28042020 = load_model('../model/employees_churn_model') #predict the uploaded data new_prediction = predict_model(deployment_28042020, data=uploaded_file) check_metric(new_prediction['left'], new_prediction['Label'], metric='AUC')
def detect(mushroom: Mushroom): data = pd.DataFrame(mushroom.to_dict()) prediction = predict_model(model, data) # print(prediction) return make_human_readable(prediction)
# In[15]: # Next, you can evaluate your model by looking at the visualization of the ROC curve, # feature importance, or confusion matrix of your model. evaluate_model(et_model) # Click on each of the buttons on the Plot Type and see the magic # We can now use our ```Extra Tree classifier``` to predict the test data that has been generated by PyCaret. As mentioned earlier, soon after we executed the setup() function at the very first step, PyCaret will automatically split our data into training data and test data. All of the model performance and evaluation metrics that we’ve seen above are solely based on the training data. # # To use the model to predict the test data, we can use the predict_model function # In[16]: predict_model(et_model) # ### Test Data # # Make sure that the number of rows above = 480 which is the rows in the test data when PyCaret automatically split it into train and test data set. # In[17]: # Let us save the model save_model(et_model, model_name='extra_tree_model') # ## Build the Web App with Streamlit # # Now it’s time for us to build our wine classifier web app. In this post, we’re going to use Streamlit to build the web app as it is more beginner friendly than Flask. # # ```pip install streamlit```
def predict_api(): data = request.get_json(force=True) data_unseen = pd.DataFrame([data]) prediction = predict_model(model, data=data_unseen) output = prediction.Label[0] return jsonify(output)
def run(): from PIL import Image image = Image.open('Employee.png') image_hospital = Image.open('office.jpg') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info( 'This app is created to predict if an employee will leave the company') st.sidebar.success('https://www.pycaret.org') st.sidebar.image(image_hospital) st.title("Predicting employee leaving") if add_selectbox == 'Online': satisfaction_level = st.number_input('satisfaction_level', min_value=0.1, max_value=1.0, value=0.1) last_evaluation = st.number_input('last_evaluation', min_value=0.1, max_value=1.0, value=0.1) number_project = st.number_input('number_project', min_value=0, max_value=50, value=5) time_spend_company = st.number_input('time_spend_company', min_value=1, max_value=10, value=3) Work_accident = st.number_input('Work_accident', min_value=0, max_value=50, value=0) promotion_last_5years = st.number_input('promotion_last_5years', min_value=0, max_value=50, value=0) salary = st.selectbox('salary', ['low', 'high', 'medium']) output = "" input_dict = { 'satisfaction_level': satisfaction_level, 'last_evaluation': last_evaluation, 'number_project': number_project, 'time_spend_company': time_spend_company, 'Work_accident': Work_accident, 'promotion_last_5years': promotion_last_5years, 'salary': salary } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('The output is {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image image = Image.open('Personal_Loan.jpg') image_office = Image.open('bank.jpg') st.image(image, use_column_width=True) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info( 'This app is created to predict if customer is eligible for personal loan or not' ) st.sidebar.success('https://www.pycaret.org') st.sidebar.image(image_office) st.title("Predicting chance to have personal loan") if add_selectbox == 'Online': ID = st.number_input('ID', min_value=1.0, max_value=10000.0, value=1.0) Age = st.number_input('Age', min_value=1.0, max_value=70.0, value=1.0) Experience = st.number_input('Experience', min_value=1.0, max_value=50.0, value=1.0) Income = st.number_input('Income', min_value=1.0, max_value=500.0, value=1.0) ZIP_Code = st.number_input('ZIP Code', min_value=1.0, max_value=100000.0, value=1.0) Family = st.number_input('Family', min_value=1.0, max_value=10.0, value=1.0) CCAvg = st.number_input('CCAvg', min_value=0.0, max_value=10.0, value=1.0) Education = st.selectbox( 'Education', ['Undergrad', 'Graduate', 'Advanced/Professional']) Mortgage = st.number_input('Mortgage', min_value=0.0, max_value=1000.0, value=1.0) Securities_Account = st.number_input('Securities Account', min_value=0.0, max_value=1.0, value=1.0) CD_Account = st.number_input('CD Account', min_value=0.0, max_value=1.0, value=1.0) Online = st.number_input('Online', min_value=0.0, max_value=1.0, value=1.0) CreditCard = st.number_input('CreditCard', min_value=0.0, max_value=1.0, value=1.0) output = "" input_dict = { 'ID': ID, 'Age': Age, 'Experience': Experience, 'Income': Income, 'ZIP Code': ZIP_Code, 'Family': Family, 'CCAvg': CCAvg, 'Education': Education, 'Mortgage': Mortgage, 'Securities Account': Securities_Account, 'CD Account': CD_Account, 'Online': Online, 'CreditCard': CreditCard } input_df = pd.DataFrame([input_dict]) if st.button(" predict eligible or not"): output = predict(model=model, input_df=input_df) output = str(output) if output == '0': output = "SORRY! YOU ARE NOT ELIGIBLE FOR PERSONAL LOAN" else: output = "CONGRATS! YOU ARE ELIGIBLE FOR PERSONAL LOAN" st.success('The Prediction -- {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image Image.open('./image/download.png').convert('RGB').save('logo.png') image = Image.open('logo.png') Image.open('ian-schneider-unsplash.jpg').convert('RGB').save('employee-turnover-ian-schneider-unsplash.png') image_churn = Image.open('employee-turnover-ian-schneider-unsplash.png') add_selectbox = st.sidebar.selectbox( "How would you like to input features?", ("Single", "Batch")) st.sidebar.markdown(""" [Example CSV input file](https://github.com/LangatGilbert/100daysofcode/blob/master/Employee%20turnover%20prediction/data/example_hr.csv) """) st.title("Employee Churn Prediction App") st.image(image_churn) st.write(""" Photo by [Ian.Schneider](https://unsplash.com/photos/TamMbr4okv4) on Unsplash. Among all the multiple aspects of Human Resource functions, attrition is painful and dreadful thing which an organization has to face inevitably. Along with employee, valuable knowledge built over the period also walks out of the door. The objective of this study is to predict whether an employee is going to stay or leave using LightGBM machine learning model. We will calculate the probability of an employee leaving/resigning which is the converted to label 0 or 1 where 0 means employee stays and 1 employee leaves. """ ) if add_selectbox == 'Single': satisfaction_level = st.sidebar.number_input('Satisfaction Level', min_value=0.1, max_value=1.0, value=0.5) last_evaluation = st.sidebar.number_input('Last Evaluation', min_value=1, max_value=100, value=25) number_project = st.sidebar.number_input('Projects', min_value=1, max_value=50, value=10) average_montly_hours = st.sidebar.number_input('Average Monthly Hours', min_value=50, max_value=400, value=200) time_spend_company = st.sidebar.number_input('Time Spent', min_value=1, max_value=30, value=10) Work_accident =st.sidebar.selectbox('Work Accident', [0, 1]) promotion_last_5years = st.sidebar.selectbox('Promotion', [0, 1]) dept = st.sidebar.selectbox('Department',['accounting','hr','IT','management','marketing','product_mng','RandD','sales','support','technical']) salary = st.sidebar.selectbox('Salary', ['high','low','medium']) output="" input_dict = {'satisfaction_level' : satisfaction_level, 'last_evaluation' : last_evaluation, 'number_project' : number_project, 'average_montly_hours' : average_montly_hours, 'time_spend_company' : time_spend_company, 'Work_accident' : Work_accident,'promotion_last_5years':promotion_last_5years, 'dept':dept,'salary':salary} input_df = pd.DataFrame([input_dict]) if st.sidebar.button("Predict"): output = predict(model=model, input_df=input_df) output = 'Label = ' + str(output) #st.success('The output is {}'.format(output)) st.subheader("Model Prediction") st.write(output) st.write('---') if add_selectbox == 'Batch': file_upload = st.sidebar.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model,data=data) #checking model accuracy on the unseen dataset st.subheader("Model Predictions on the batch data") st.write(predictions.head()) st.subheader("Model Accuracy") st.write(check_metric(predictions['left'], predictions['Label'], metric = 'Accuracy')) st.write('----') #Download the csv file. def filedownload(df): csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() href = f'<a href = "data:file/csv;base64,{b64}" download = "predictions.csv">Download the Predictions</a>' return href st.markdown(filedownload(predictions), unsafe_allow_html = True) st.sidebar.info('This app is created by Gilbert Langat to predict employees churn in organization XYZ. Data used is app is obtained from [Kaggle](https://www.kaggle.com/arvindbhatt/hrcsv)') st.sidebar.image(image) st.sidebar.success('https://www.pycaret.org')
def classificador(modelo, dados): pred = predict_model(estimator=modelo, data=dados) return pred
def run(): from PIL import Image image = Image.open( 'C:/Users/Lenovo/Downloads/Case-Study-22-Model-Deployment (1)/Case Study/employee.jpeg' ) image_office = Image.open( 'C:/Users/Lenovo/Downloads/Case-Study-22-Model-Deployment (1)/Case Study/office.jpeg' ) st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Realtime", "Batch")) st.sidebar.info('This app predicts if an employee will leave the company') st.sidebar.image(image_office) st.title("Predicting employee leaving") if add_selectbox == 'Realtime': satisfaction_level = st.number_input('satisfaction_level', min_value=0.1, max_value=1.0, value=0.1) last_evaluation = st.number_input('last_evaluation', min_value=0.1, max_value=1.0, value=0.1) number_project = st.number_input('number_project', min_value=0, max_value=50, value=5) average_montly_hours = st.number_input('average_montly_hours', min_value=0.1, max_value=1.0, value=0.1) time_spend_company = st.number_input('time_spend_company', min_value=1, max_value=10, value=3) Work_accident = st.number_input('Work_accident', min_value=0, max_value=50, value=0) promotion_last_5years = st.number_input('promotion_last_5years', min_value=0, max_value=50, value=0) department = st.selectbox('department', [ 'sales', 'accounting', 'hr', 'technical', 'support', 'management', 'IT', 'product_mng', 'marketing', 'RandD' ]) salary = st.selectbox('salary', ['low', 'high', 'medium']) output = "" input_dict = { 'satisfaction_level': satisfaction_level, 'last_evaluation': last_evaluation, 'number_project': number_project, 'average_montly_hours': average_montly_hours, 'time_spend_company': time_spend_company, 'Work_accident': Work_accident, 'promotion_last_5years': promotion_last_5years, 'department': department, 'salary': salary } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success('The output is {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image image = Image.open('dsp3.jpeg') image = image.resize((300, 100)) #new_img.save("car_resized.jpg", "JPEG", optimize=True) image_meeting_room = Image.open('meeting room.JPG') image_meeting_room = image_meeting_room.resize((300, 100)) #st.image(image,use_column_width=True) st.sidebar.image(image) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch", "About")) st.sidebar.info( 'This app is created to predict Office Room occupancy prediction') #st.sidebar.success('https://www.pycaret.org') #st.sidebar.image(image) st.sidebar.image(image_meeting_room) st.title("Office Room occupancy prediction") if add_selectbox == 'Online': input_user = st.date_input('User input Date') #datetime_object = datetime.strptime(input_user, "%d-%m-%Y") user_year = input_user.year user_month = input_user.month user_day = input_user.day user_weekend = input_user.weekday() #user_weekend user_weekend1 = 1 if user_weekend > 5 else 0 user_temperature = st.number_input('Temperature', min_value=1, value=23) user_humidity = st.number_input('Humidity', min_value=1, value=27) user_light = st.number_input('Light in Lux', min_value=1, value=460) user_co2 = st.number_input('CO2 in ppm', min_value=1, value=1040) user_HumidityRatio = st.slider('Humidity ratio', min_value=0.000, max_value=1.000, step=0.001, value=0.004, format="%.3f") output = "" output1 = "" #input_dict = {'OCCUPANCY' : OCCUPANCY, 'EXPENSE' : EXPENSE, 'ADR' : ADR, 'REVENUE' : REVENUE} user_df_data = [[ user_year, user_month, user_weekend1, user_day, user_temperature, user_humidity, user_light, user_co2, user_HumidityRatio ]] user_df_colnames = [ "Year", "Month", "weekend", "day", "Temperature", "Humidity", "Light", "CO2", "HumidityRatio" ] input_df = pd.DataFrame(user_df_data, columns=user_df_colnames) if st.button("Predict"): output = predict(model=model, input_df=input_df) #output=str(output) #output1 = output.apply(lambda x: "Occupied" if x ==1 else "Available") #output1 = output.apply(lambda x: x.map({1 : 'Occupied', 0 : 'Available'})) #output_dict = "" output_dict = {1: 'Occupied', 0: 'Available'} final_label = "" final_label = np.where(output == 1, 'Occupied', np.where(output == 0, "Available", "???????")) #st.success('The Room will be ' + str(final_label)) #st.success('The Room occupancy will be {}'.format(output_dict[output])) st.success(f'The Room will be {final_label}') if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions) if add_selectbox == 'About': st.subheader("Built with Streamlit and Pycaret") st.subheader("Hunaidkhan Pathan") st.subheader("https://www.linkedin.com/in/hunaidkhan/") st.button("Re-run") check
# EDA plots phik_corr = df.phik_matrix() correlogram = sns.heatmap(phik_corr) barchart = px.histogram(df, x='PAY_0', color='default payment next month', barmode='group') col1, col2 = st.columns(2) col1.write(correlogram.figure) col2.write(barchart) plt.clf() # feature importance plot feature_importances = pd.Series(lgbm.feature_importances_, index=pyclf.get_config('X').columns) feat_imp_plot = feature_importances.nlargest(20).plot(kind='barh') feat_imp_plot.invert_yaxis() col1, col2 = st.columns(2) col1.write(feat_imp_plot.figure) # confusion matrix plot predictions = pyclf.predict_model(lgbm, df) cm = plot_confusion_matrix( confusion_matrix(df['default payment next month'], predictions['Label'])) col2.write(cm[1].figure)
def run(): from PIL import Image image_2 = Image.open('churn_image.jpeg') #st.image(image,use_column_width=False) add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.image(image_2) st.sidebar.info('This app is created to predict if a customer will churn') st.sidebar.success('https://www.github.com/pereira94') if add_selectbox == 'Online': tenure = st.number_input('Tenure in Months', min_value=0, max_value=80, value=7) MonthlyCharges = st.number_input('Monthly Charges', min_value=18, max_value=119, value=65) gender = st.selectbox('Gender', ['Male', 'Female']) MultipleLines = st.selectbox('Multiple lines?', ['No', 'Yes', 'No phone service']) InternetService = st.selectbox('Internet Service?', ['Fiber optic', 'DSL', 'No']) OnlineSecurity = st.selectbox('Online Security', ['Yes', 'No', 'No internet service']) OnlineBackup = st.selectbox('Online Backup?', ['No', 'Yes', 'No internet service']) DeviceProtection = st.selectbox('Device Protection?', ['No', 'Yes', 'No internet service']) TechSupport = st.selectbox('Tech Support?', ['No', 'Yes', 'No internet service']) StreamingTV = st.selectbox('Streaming TV?', ['No', 'Yes', 'No internet service']) StreamingMovies = st.selectbox('Streaming Movies?', ['No', 'Yes', 'No internet service']) Contract = st.selectbox( 'Contract?', ['Month-to-month', 'Two year', 'No internet service']) PaymentMethod = st.selectbox('Payment Method', [ 'Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)' ]) if st.checkbox('Senior Citizen?'): SeniorCitizen = 1 else: SeniorCitizen = 0 if st.checkbox('Has Partner?'): Partner = True else: Partner = False if st.checkbox('Has Dependents?'): Dependents = True else: Dependents = False if st.checkbox('Has Phone Service?'): PhoneService = True else: PhoneService = False if st.checkbox('Paperless Billing?'): PaperlessBilling = True else: PaperlessBilling = False output = "" input_dict = { 'tenure': tenure, 'MonthlyCharges': MonthlyCharges, 'gender': gender, 'MultipleLines': MultipleLines, 'InternetService': InternetService, 'OnlineSecurity': OnlineSecurity, 'OnlineBackup': OnlineBackup, 'DeviceProtection': DeviceProtection, 'TechSupport': TechSupport, 'StreamingTV': StreamingTV, 'StreamingMovies': StreamingMovies, 'Contract': Contract, 'PaymentMethod': PaymentMethod, 'SeniorCitizen': SeniorCitizen, 'Partner': Partner, 'Dependents': Dependents, 'PhoneService': PhoneService, 'PaperlessBilling': PaperlessBilling } df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict_churn(model=model, df=df) st.success('Will the customer leave? {}'.format(output)) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def run(): from PIL import Image image = Image.open('logo.jpg') image_stock = Image.open('stock.jpg') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("예측 방법 결정", ("Online", "Batch")) st.sidebar.info('프로젝트명 :' + '\n' + '자연어 처리 기반의 투자분석 및 예측시스템 개발') st.sidebar.success('팀명 : 턴어라운드') st.sidebar.success('팀원 : 이지훈, 이문형, 강민재, 구병진, 김서정') st.sidebar.image(image_stock) st.title("코스피 지수 예측 모델") # 사용자 설정 if add_selectbox == 'Online': date = str( st.number_input('Date', min_value=20200101, max_value=20201231, value=20201006)) target = st.selectbox('Target', ['KOSPI', 'YG']) method = st.selectbox('Method', ['AutoML', 'ARIMA', 'Prophet', 'RL', 'NLP']) output = "" input_dict = {'Date': date, 'Target': target, 'Method': method} input_ = DataCollectionModel.DataCollection(date) prophet_input_ = ProphetModel.Prophet_(date) # 데이터 수집 + 학습 데이터 준비 if target == 'KOSPI': input_df = input_.kospi_collection() if method == 'AutoML': model = load_model('deployment_20201020') elif method == 'Prophet': # model load 필요시 여기에 추가 print("prophet") else: input_df = input_.yg_collection() if method == 'AutoML': model = load_model('deployment_yg_20201020') # 주가 예측 if st.button("주가 예측"): if method == 'AutoML': output = predict(model=model, input_df=input_df[1]) if output == '1': output = date + "주가 상승 예상 -> 매매 어드바이스 : 매수" else: output = date + "주가 하락 예상 -> 매매 어드바이스 : 매도" elif method == 'Prophet': if target == 'KOSPI': output = prophet_input_.prophet_kospi(input_df[0]) if output == '1': output = date + "주가 상승 예상 -> 매매 어드바이스 : 매수" else: output = date + "주가 하락 예상 -> 매매 어드바이스 : 매도" else: output = prophet_input_.prophet_yg(input_df[0]) if output == '1': output = date + "주가 상승 예상 -> 매매 어드바이스 : 매수" else: output = date + "주가 하락 예상 -> 매매 어드바이스 : 매도" st.success(output) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def predict_quality(model, df): predictions_data = predict_model(estimator=model, data=df) return predictions_data['Label'][0]
def run(): add_selectbox = st.sidebar.selectbox("How would you like to predict?", ("Online", "Batch")) st.sidebar.info('This app is created to predict credit classification') st.sidebar.success('https://www.linkedin.com/in/felipe-sembay/') st.title("Credit Loan Prediction App") if add_selectbox == 'Online': monthly_income = st.number_input('monthly_income', min_value=1, max_value=1000000, value=2000) if st.checkbox('cpf_restriction'): restriction = 1 else: restriction = 0 loan_amount = st.number_input('loan_amount', min_value=1, value=5000) auto_debt = st.number_input('auto_debt', min_value=0, max_value=1000000, value=0) if st.checkbox('declares_income_tax'): declares = 1 else: declares = 0 idade = st.number_input('idade', min_value=18, max_value=120, value=30) output = "" input_dict = { 'monthly_income': monthly_income, 'cpf_restriction': restriction, 'loan_amount': loan_amount, 'auto_debt': auto_debt, 'declares_income_tax': declares, 'idade': idade } input_df = pd.DataFrame([input_dict]) if st.button("Predict"): output = predict(model=model, input_df=input_df) output = str(output) st.success( 'A probabilidade do seu empréstimo ser liberado é de {}'.format( output)) st.markdown( "Valores acima de 0.50 são classificados como liberados; valores inferiores a 0.50 são classificados como negados." ) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)