def _store_regresser(self, regression_model):
     task_output_dir = self.task.output_dir
     regressor_file_name = f"regressor_{self.task.pycaret_model}"
     regressor_output_path = os.path.join(task_output_dir,
                                          regressor_file_name)
     regression.save_model(regression_model, regressor_output_path)
示例#2
0
def write(state):
    
    if state.task == "Regression":
        from pycaret.regression import predict_model, finalize_model,save_model
    elif state.task == "Classification":
        from pycaret.classification import predict_model, finalize_model,save_model
    else:
        from pycaret.clustering import predict_model,save_model
        
    def online_predict(model, input_df,target_type):
        """make prediction on online data

        Args:
            model (object): a trained model
            input_df (pd.DataFrame): the input dataframe for predicitons
            target_type (str): the type of training target

        Returns:
            str: predcition
        """
        prediction_df = predict_model(model, data=input_df)
        if target_type == 'Regression' or target_type == 'Classification':
            predictions = prediction_df['Label'][0]
        else:
            predictions = prediction_df['Cluster'][0]
        return predictions        

    if state.trained_model is not None:
        st.header("Make a Prediction on Given Input or Upload a File.")

        add_selectbox = st.sidebar.selectbox(
            "How would you like to predict?",
            ("Online", "Batch", "SaveModel")
        )

        X_before_preprocess = state.X_before_preprocess
        target_name = state.y_before_preprocess
        ignore_columns = state.ignore_columns
        trained_model = state.trained_model      
        
        if add_selectbox == "Online":
            with st.spinner("Predicting ..."):
                input_df = retrieve_train_element(X_before_preprocess, target_name, ignore_columns,state.task)
                output = ""
                if st.button("Predict"):
                    output = online_predict(trained_model, input_df,state.task)
                    output = str(output)
                    st.success(f'The Prediction is **{output}**')
        
        if add_selectbox == 'Batch':
            file_upload = st.file_uploader('Upload csv file for prediciton', type=["csv", "xlsx"])
            if file_upload is not None:
                file_extension = file_upload.name.split('.')[1]
                if file_extension == "csv":
                    data = pd.read_csv(file_upload)
                else:
                    data = pd.read_excel(file_upload)
                predictions = predict_model(trained_model, data=data)
                st.write(predictions)  
                
                is_download = st.checkbox("Do You Want to Download the Prediction File?", value=False)
                if is_download:
                    file_extension = st.selectbox("Choose Csv or Excel File to Download", options=[".csv",".xlsx"])
                    file_name = st.text_input("File Name",value="prediction",key=1)
                    if file_name:
                        href = download_button(predictions, file_name, "Download",file_extension)
                        st.markdown(href, unsafe_allow_html=True)
                    else:
                        st.error("File Name cannot be empty!") 
        
        if add_selectbox == "SaveModel":
            is_download = st.checkbox("Do You Want to Download the Model?", value=False)
            if is_download:
                file_name = st.text_input("File Name",value="",key=2)
                is_finalize = st.checkbox("Do You Want to Finalize the Model (not for clustering)?", value=False)
                if file_name:
                    if is_finalize:
                        finalized_model = finalize_model(trained_model)
                        _,name = save_model(finalized_model, file_name)
                    else:
                        _,name = save_model(trained_model, file_name)
                    with open(name, "rb") as f:
                        e = joblib.load(f)
                    href = download_button(e, file_name, "Download",".pkl",pickle_it=True)
                    st.markdown(href, unsafe_allow_html=True)
                    
                    remove_cache = st.checkbox("Remove the Cache?", value=False)
                    if remove_cache:
                        p = Path(".").glob("*.pkl")
                        for filename in p:
                            filename.unlink()
                        if len(list(p)) == 0:
                            st.success("Delete the Cache File from Local Filesystem!")
                            st.balloons()
                else:
                    st.error("Please Give a File Name first!")
                

    else:
        st.error("Please Train a Model first!")
示例#3
0
from pycaret.regression import setup, create_model, tune_model, save_model
import pandas as pd

data = pd.read_csv('C:/tmp/insurance.csv',  delimiter=',')
print(data.head())

r2 = setup(data, target='charges', session_id=123,
           normalize=True,
           polynomial_features=True, trigonometry_features=True,
           feature_interaction=True,
           bin_numeric_features=['age', 'bmi'])

lr = create_model('lr')
tuned_lr = tune_model(lr)
save_model(tuned_lr, model_name='./models/lr_deployment_20210521')
示例#4
0
def regression_model(*, y_col, training_set, normalize, test_size, folds,
                     metric, model_name, testing_set, imbalanced, seed,
                     include_models, normalize_method):
    """
    Build a regression model for prediction.

    Parameters
    ----------
    y_col : str
        the name of the target column.
    training_set : pd.DataFrame
        DataFrame containing the training data.
    normalize : bool
        if True the dataset will be normalized before training.
    test_size : float
        Between [0.0-1.0]. The size of the split for test within the training set.
    folds : int
        number of folds for cross validation.
    metric : str
        the metric used for evaluating the best model.
    model_name : str
        the name to save the model.
    testing_set : pd.DataFrame
        the external dataset for evaluating the best model.
    imbalanced
    seed : int
        random number to initilize the process.
    include_models : List
        a list of models to be included in the process.
    normalize_method : str
        The method used for normalizing the data.

    Returns
    -------
    Final regression model

    """
    if not metric:
        metric = 'RMSE'
    setup = pyreg.setup(target=y_col,
                        data=training_set,
                        normalize=normalize,
                        normalize_method=normalize_method,
                        train_size=1 - test_size,
                        fold=folds,
                        silent=True,
                        session_id=seed)
    best_model = pyreg.compare_models(sort=metric, include=include_models)
    pyreg.pull().to_csv(model_name + '_compare_models.tsv',
                        sep='\t',
                        index=False)
    reg_model = pyreg.create_model(best_model)
    reg_tuned_model = pyreg.tune_model(reg_model, optimize=metric)
    pyreg.pull().to_csv(model_name + '_tuned_model.tsv', sep='\t', index=False)
    final_model = pyreg.finalize_model(reg_tuned_model)
    pyreg.plot_model(final_model, save=True)
    pyreg.plot_model(final_model, plot='feature', save=True)
    pyreg.plot_model(final_model, plot='error', save=True)
    pyreg.save_model(final_model, model_name)
    if len(testing_set.index) != 0:
        unseen_predictions = test_regressor(
            model_path=model_name + '.pkl',
            x_set=testing_set.drop(columns=[y_col]),
            y_col=testing_set[y_col],
            output=model_name)
        unseen_predictions.to_csv(model_name + '_external_testing_results.tsv',
                                  sep='\t',
                                  index=True)
    return final_model