def run_demo(): # Note: this required adding the line "from IPython.display import display" to the file # C:\ProgramData\Anaconda3\Lib\site-packages\pycaret\datasets.py dataset = get_data('credit') # Check the shape of data dataset.shape # In order to demonstrate the predict_model() function on unseen data, a sample of 1200 records has been withheld # from the original dataset to be used for predictions. This should not be confused with a train/test split as this # particular split is performed to simulate a real life scenario. Another way to think about this is that these # 1200 records are not available at the time when the machine learning experiment was performed. data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True) data_unseen = dataset.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) # The setup() function initializes the environment in pycaret and creates the transformation pipeline to prepare the # data for modeling and deployment. setup() must be called before executing any other function in pycaret. It takes # two mandatory parameters: a pandas dataframe and the name of the target column. exp_clf101 = setup(data=data, target='default', session_id=123) # Comparing all models to evaluate performance is the recommended starting point for modeling once the setup is # completed (unless you exactly know what kind of model you need, which is often not the case). This function trains # all models in the model library and scores them using stratified cross validation for metric evaluation. The # output prints a score grid that shows average Accuracy, AUC, Recall, Precision, F1 and Kappa accross the folds # (10 by default) of all the available models in the model library. compare_models()
def run(): data = get_data('insurance') mlflow.set_tracking_uri("sqlite:///mlruns.db") s = setup(data, target = 'charges', session_id = 123, silent = True, log_experiment = True, experiment_name = 'insurance_demo4', log_plots = True) models = ['lr', 'dt', 'lightgbm', 'rf'] all_models = [create_model(i) for i in models]
def get_pycaret_data(): dataset = get_data("credit") data = dataset.sample(frac=0.95, random_state=786) data_unseen = dataset.drop(data.index) data.reset_index(inplace=True, drop=True) data_unseen.reset_index(inplace=True, drop=True) return data, data_unseen[:5]
def test(): from pycaret.datasets import get_data data = get_data("boston") from pycaret.regression import setup, create_model, tune_model s = setup(data, target="medv", silent=True, html=False, session_id=123) gbr = create_model("gbr") tuned_gbr = tune_model(gbr) xgboost = create_model("xgboost") tuned_xgboost = tune_model(xgboost) lightgbm = create_model("lightgbm") tuned_lightgbm = tune_model(lightgbm) assert 1 == 1
#%% from pycaret.datasets import get_data # %% data = get_data ('kiva') #%% import pandas as pd df_corpus = pd.read_excel("../../storage/ClientesBotonPagos__202006.xlsx") # %% df_kaggle_train = pd.read_csv("../../storage/kaggle_train.csv") df_kaggle_test = pd.read_csv("../../storage/kaggle_test.csv") df_kaggle_sample = pd.read_csv("../../storage/kaggle_samplesub.csv") # %% df_corpus.describe() # %% df_corpus.head() # %% df_corpus.NOMBRE_COMERCIO.head() # %% from nltk.corpus import stopwords stop_words = stopwords.words('spanish') from stop_words import get_stop_words stop_words = get_stop_words('spanish') # %% stop_words # %% from pycaret.nlp import *
from pycaret.datasets import get_data boston_data = get_data('boston')
#!/usr/bin/env python3 # Created by Paul A. Gureghian in June 2020. # # This Python program demos the PyCaret ML library. # # Start the program. # # Import PyCaret. # from pycaret.datasets import get_data diabetes = get_data('diabetes')
from pycaret.utils import version version() from pycaret.datasets import get_data data = get_data('insurance') from pycaret.regression import * reg1 = setup(data, target='charges', session_id=123, log_experiment=True, experiment_name='insurance1') from pycaret.regression import * r2 = setup(data, target='charges', session_id=123, normalize=True, polynomial_features=True, trigonometry_features=True, feature_interaction=True, bin_numeric_features=['age', 'bmi']) best_model = compare_models(fold=5) lr = create_model('lr') save_model(lr, model_name='deploy_model')
# -*- coding: utf-8 -*- """UC1_CreditCardDefault.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1aQFNB8eIe6HS430IbSx8LdoMCI7KcFmx """ !pip install pycaret from pycaret.utils import enable_colab enable_colab() from pycaret.datasets import get_data credit_data = get_data('credit') credit_data.shape data = credit_data.sample(frac=0.95, random_state=786).reset_index(drop=True) data_unseen = credit_data.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) from pycaret.classification import * exp = setup(data = data,target='default',session_id=123) compare_models() catboost = create_model('catboost')
#getting input parameters from sys import sys dataset = sys.argv[1] target = sys.argv[2] exp_name = str(dataset) + '_exp' #import dataset using sys arg from pycaret.datasets import get_data data = get_data(dataset) #initialize setup from pycaret.classification import setup, compare_models, blend_models, tune_model, save_model, deploy_model, automl clf1 = setup(data, target=target, silent=True, html=False, log_experiment=True, experiment_name=exp_name) #compare models and select top5 top5 = compare_models(n_select=5, blacklist=['catboost']) #blend top 5 models blender = blend_models(estimator_list=top5) #tune best model tuned_best_model = tune_model(top5[0])
# -*- coding: utf-8 -*- """ Created on Sat Mar 16 00:48:48 2021 @author: Rashmi Thekkath """ from pycaret.datasets import get_data data = get_data("insurance") from pycaret.regression import * s = setup(data, target='charges', session_id=123) lr = create_model('lr') plot_model(lr) #linear regression - 10 fold cross validation save_model(lr, model_name='C:/Users/rashm/Insurance_cloud/deployment_28042020') #To save the linear regression model from this python notebook as a filein Insurance_cloud folder #Saved as a pkl file
from pycaret.datasets import get_data from pycaret.classification import * data = get_data('titanic') clf1 = setup(data, preprocess=True, target='Survived', session_id=124, log_experiment=True, experiment_name='tt6', log_data=True, silent=True) lr = create_model('lr') save_model(lr, model_name="titanic_lr")
from pycaret.datasets import get_data from pycaret.clustering import setup, create_model, assign_model jewellery = get_data('jewellery') clu = setup(data=jewellery) kmeans = create_model('kmeans') kmeans_results = assign_model(kmeans)
#Topic: Py Caret #----------------------------- #libraries #https://pycaret.org/ #PyCaret is an open source, low-code machine learning library in Python that allows you to go from preparing your data to deploying your model within minutes in your choice of notebook environment. pip install pyCaret pip install SQLA1chemy import pycaret import pandas as pd # Loading data from pycaret #datasets - https://pycaret.org/get-data/ from pycaret.datasets import get_data data = get_data('juice') #env for modeling #https://pycaret.org/setup/ #Classification from pycaret.classification import * #Regression from pycaret.regression import * #Clustering from pycaret.clustering import * #Anomaly Detection from pycaret.anomaly import * #Natural Language Processing from pycaret.nlp import * #Association Rule Mining from pycaret.arules import * #classification from pycaret.datasets import get_data diabetes = get_data('diabetes') # Importing module and initializing setup
from pycaret.utils import version version() # In[3]: from pycaret.datasets import get_data # In[4]: # Internet connection is required dataSets = get_data('index') dataSets # In[5]: # Internet connection is required bike_df = get_data("bike") # This is regression dataset. The values in medv are continuous values # In[6]: print(bike_df.shape)
import pandas as pd data = pd.read_csv('c:/path_to_data/file.csv') from pycaret.datasets import get_data data = get_data('juice')
def start(): diabetes: pd.DataFrame = get_data('diabetes') print(diabetes) experiment = setup(diabetes, target='Class variable') compare_models()
## My ARGS # Pycaret Args _input_file = None _demo_dataset = 'boston' _pandas_profiling = True # Default is True which gives detailed _target = 'medv' # default Target cclass for Boston data _silent_preproccessing = True import logging #import the dataset from pycaret repository import pandas as pd if _input_file == None and _demo_dataset == None: from pycaret.datasets import get_data input_data = get_data('boston', profile = True) data = input_data.sample(frac=0.9, random_state=786).reset_index(drop=True) data_unseen = input_data.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) elif _demo_dataset in ['diabetes', 'boston']: from pycaret.datasets import get_data input_data = get_data(_demo_dataset, profile = True) data = input_data.sample(frac=0.9, random_state=786).reset_index(drop=True) data_unseen = input_data.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) else: input_data = pd.read_csv(_input_file)
#lOAD DATA from pycaret.datasets import get_data data = get_data('datasets_insurance') data #SHOW EXPERIMENT 1 from pycaret.regression import * s = setup(data, target = 'charges', session_id = 123) #CREATE LOGISTIC REGRESSION MODEL lr = create_model('lr') #PLOT LR MODEL plot_model(lr) #EXPLORE EXPERIMENT 2 s2 = setup(data, target = 'charges', session_id = 123, normalize = True, polynomial_features = True, trigonometry_features = True, feature_interaction=True, bin_numeric_features= ['age', 'bmi']) #SHOW COLUMNS IN EXPERIMENT 2 s2[0].columns #CREATE EXPERIMENT 2 MODEL lr = create_model('lr') #PLOT THE MODEL plot_model(lr) #save the model
# Databricks notebook source # COMMAND ---------- from pycaret.classification import * # COMMAND ---------- from pycaret.datasets import get_data dataset = get_data('credit') # COMMAND ---------- data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True) data_unseen = dataset.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) # COMMAND ---------- exp_clf101 = setup(data = data, target = 'default', session_id=123) # COMMAND ---------- rf = create_model('rf') # COMMAND ---------- tuned_rf = tune_model(rf)
# Making and training the Linear Regression model on the dataset = 'insurance' from pycaret.datasets import get_data dataset = get_data('insurance') from pycaret.regression import * # Experiment 1 using simple model creation without any feature scaling # s1 = setup(dataset, target = 'charges', session_id = 123) # lr = create_model('lr') # plot_model(lr) # Experiment 2 adding some additional parameters s2 = setup(dataset, target = 'charges', session_id = 123, normalize = True, polynomial_features = True, trigonometry_features = True, feature_interaction = True, bin_numeric_features = ['age', 'bmi']) lr = create_model('lr') plot_model(lr) save_model(lr, 'deployment_30052020') # import requests # url = 'https://pycaret-insurance.herokuapp.com/predict_api' # pred = requests.post(url,json={'age':55, 'sex':'male', 'bmi':59, 'children':1, 'smoker':'male', 'region':'northwest'}) # print(pred.json())
# coding: utf-8 # BELOW IS A PREDICTIVE MODEL USING PYCARET AND BOSTON HOUSING DATA # In[1]: import pycaret import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from pycaret.datasets import get_data # In[2]: df = get_data('boston') # **Data Preprocessing # # This data is already cleaned from Kaggle. # # In[3]: df.shape # In[4]: df.isnull().sum() # the above shows that the datasets are complete and no
# -*- coding: utf-8 -*- """Pycaret_Binary_Classification.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1Gsukmin5VPKFwpVav7wix6JQKb9nUAdf """ # Installing pycaret !pip install pycaret # Getting the Data from pycaret.datasets import get_data dataset = get_data('credit') #check the shape of data dataset.shape #a sample of 1200 records has been withheld from the original dataset to be used for predictions data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True) data_unseen = dataset.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) """# Setting up Environment in PyCaret""" from pycaret.classification import * exp_clf101 = setup(data = data, target = 'default', session_id=123)
''' Author: Christian O'Leary Email: [email protected] ''' from emmv import emmv_scores # Adapted from https://pycaret.org/setup/ # Importing dataset from pycaret.datasets import get_data anomalies = get_data('anomaly') # Importing module and initializing setup from pycaret.anomaly import * anomaly_setup = setup(data=anomalies) # create a model model = create_model('iforest') results = assign_model(model) # Get EM & MV scores test_scores = emmv_scores(model, anomalies) print('Excess Mass score;', test_scores['em']) print('Mass Volume score:', test_scores['mv'])
#dataset and target dataset = 'juice' target = 'Purchase' #checking version from pycaret.utils import version version() import time t0 = time.time() #loading dataset from pycaret.datasets import get_data data = get_data(dataset, verbose=False) #init regression from pycaret.classification import setup exp1 = setup(data, target=target, silent=True, html=False, verbose=False) #RECEIPE #1 - SELECT TOP 5 MODELS from pycaret.classification import compare_models top5 = compare_models(n_select=5, whitelist=['dt', 'lr', 'rf', 'lightgbm', 'xgboost']) #RECEIPE #2 - TUNE TOP 5 MODELS from pycaret.classification import tune_model tuned_top5 = [tune_model(i) for i in top5] print(len(tuned_top5)) #RECIPE #3 from pycaret.classification import blend_models