def _store_regresser(self, regression_model): task_output_dir = self.task.output_dir regressor_file_name = f"regressor_{self.task.pycaret_model}" regressor_output_path = os.path.join(task_output_dir, regressor_file_name) regression.save_model(regression_model, regressor_output_path)
def write(state): if state.task == "Regression": from pycaret.regression import predict_model, finalize_model,save_model elif state.task == "Classification": from pycaret.classification import predict_model, finalize_model,save_model else: from pycaret.clustering import predict_model,save_model def online_predict(model, input_df,target_type): """make prediction on online data Args: model (object): a trained model input_df (pd.DataFrame): the input dataframe for predicitons target_type (str): the type of training target Returns: str: predcition """ prediction_df = predict_model(model, data=input_df) if target_type == 'Regression' or target_type == 'Classification': predictions = prediction_df['Label'][0] else: predictions = prediction_df['Cluster'][0] return predictions if state.trained_model is not None: st.header("Make a Prediction on Given Input or Upload a File.") add_selectbox = st.sidebar.selectbox( "How would you like to predict?", ("Online", "Batch", "SaveModel") ) X_before_preprocess = state.X_before_preprocess target_name = state.y_before_preprocess ignore_columns = state.ignore_columns trained_model = state.trained_model if add_selectbox == "Online": with st.spinner("Predicting ..."): input_df = retrieve_train_element(X_before_preprocess, target_name, ignore_columns,state.task) output = "" if st.button("Predict"): output = online_predict(trained_model, input_df,state.task) output = str(output) st.success(f'The Prediction is **{output}**') if add_selectbox == 'Batch': file_upload = st.file_uploader('Upload csv file for prediciton', type=["csv", "xlsx"]) if file_upload is not None: file_extension = file_upload.name.split('.')[1] if file_extension == "csv": data = pd.read_csv(file_upload) else: data = pd.read_excel(file_upload) predictions = predict_model(trained_model, data=data) st.write(predictions) is_download = st.checkbox("Do You Want to Download the Prediction File?", value=False) if is_download: file_extension = st.selectbox("Choose Csv or Excel File to Download", options=[".csv",".xlsx"]) file_name = st.text_input("File Name",value="prediction",key=1) if file_name: href = download_button(predictions, file_name, "Download",file_extension) st.markdown(href, unsafe_allow_html=True) else: st.error("File Name cannot be empty!") if add_selectbox == "SaveModel": is_download = st.checkbox("Do You Want to Download the Model?", value=False) if is_download: file_name = st.text_input("File Name",value="",key=2) is_finalize = st.checkbox("Do You Want to Finalize the Model (not for clustering)?", value=False) if file_name: if is_finalize: finalized_model = finalize_model(trained_model) _,name = save_model(finalized_model, file_name) else: _,name = save_model(trained_model, file_name) with open(name, "rb") as f: e = joblib.load(f) href = download_button(e, file_name, "Download",".pkl",pickle_it=True) st.markdown(href, unsafe_allow_html=True) remove_cache = st.checkbox("Remove the Cache?", value=False) if remove_cache: p = Path(".").glob("*.pkl") for filename in p: filename.unlink() if len(list(p)) == 0: st.success("Delete the Cache File from Local Filesystem!") st.balloons() else: st.error("Please Give a File Name first!") else: st.error("Please Train a Model first!")
from pycaret.regression import setup, create_model, tune_model, save_model import pandas as pd data = pd.read_csv('C:/tmp/insurance.csv', delimiter=',') print(data.head()) r2 = setup(data, target='charges', session_id=123, normalize=True, polynomial_features=True, trigonometry_features=True, feature_interaction=True, bin_numeric_features=['age', 'bmi']) lr = create_model('lr') tuned_lr = tune_model(lr) save_model(tuned_lr, model_name='./models/lr_deployment_20210521')
def regression_model(*, y_col, training_set, normalize, test_size, folds, metric, model_name, testing_set, imbalanced, seed, include_models, normalize_method): """ Build a regression model for prediction. Parameters ---------- y_col : str the name of the target column. training_set : pd.DataFrame DataFrame containing the training data. normalize : bool if True the dataset will be normalized before training. test_size : float Between [0.0-1.0]. The size of the split for test within the training set. folds : int number of folds for cross validation. metric : str the metric used for evaluating the best model. model_name : str the name to save the model. testing_set : pd.DataFrame the external dataset for evaluating the best model. imbalanced seed : int random number to initilize the process. include_models : List a list of models to be included in the process. normalize_method : str The method used for normalizing the data. Returns ------- Final regression model """ if not metric: metric = 'RMSE' setup = pyreg.setup(target=y_col, data=training_set, normalize=normalize, normalize_method=normalize_method, train_size=1 - test_size, fold=folds, silent=True, session_id=seed) best_model = pyreg.compare_models(sort=metric, include=include_models) pyreg.pull().to_csv(model_name + '_compare_models.tsv', sep='\t', index=False) reg_model = pyreg.create_model(best_model) reg_tuned_model = pyreg.tune_model(reg_model, optimize=metric) pyreg.pull().to_csv(model_name + '_tuned_model.tsv', sep='\t', index=False) final_model = pyreg.finalize_model(reg_tuned_model) pyreg.plot_model(final_model, save=True) pyreg.plot_model(final_model, plot='feature', save=True) pyreg.plot_model(final_model, plot='error', save=True) pyreg.save_model(final_model, model_name) if len(testing_set.index) != 0: unseen_predictions = test_regressor( model_path=model_name + '.pkl', x_set=testing_set.drop(columns=[y_col]), y_col=testing_set[y_col], output=model_name) unseen_predictions.to_csv(model_name + '_external_testing_results.tsv', sep='\t', index=True) return final_model