Python get_data示例，pycaret.datasets.get_data Python示例

示例#1

0

显示文件

文件： playground.py 项目： turczytj/My_Machine_Learning_Playground

def run_demo():
    # Note: this required adding the line "from IPython.display import display" to the file
    #   C:\ProgramData\Anaconda3\Lib\site-packages\pycaret\datasets.py
    dataset = get_data('credit')

    # Check the shape of data
    dataset.shape

    # In order to demonstrate the predict_model() function on unseen data, a sample of 1200 records has been withheld
    # from the original dataset to be used for predictions. This should not be confused with a train/test split as this
    # particular split is performed to simulate a real life scenario. Another way to think about this is that these
    # 1200 records are not available at the time when the machine learning experiment was performed.
    data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True)
    data_unseen = dataset.drop(data.index).reset_index(drop=True)

    print('Data for Modeling: ' + str(data.shape))
    print('Unseen Data For Predictions: ' + str(data_unseen.shape))

    # The setup() function initializes the environment in pycaret and creates the transformation pipeline to prepare the
    # data for modeling and deployment. setup() must be called before executing any other function in pycaret. It takes
    # two mandatory parameters: a pandas dataframe and the name of the target column.
    exp_clf101 = setup(data=data, target='default', session_id=123)

    # Comparing all models to evaluate performance is the recommended starting point for modeling once the setup is
    # completed (unless you exactly know what kind of model you need, which is often not the case). This function trains
    # all models in the model library and scores them using stratified cross validation for metric evaluation. The
    # output prints a score grid that shows average Accuracy, AUC, Recall, Precision, F1 and Kappa accross the folds
    # (10 by default) of all the available models in the model library.
    compare_models()

示例#2

0

显示文件

def run():
    data = get_data('insurance')
    mlflow.set_tracking_uri("sqlite:///mlruns.db")
    s = setup(data, target = 'charges', session_id = 123, silent = True, 
                log_experiment = True, experiment_name = 'insurance_demo4', log_plots = True)
    models = ['lr', 'dt', 'lightgbm', 'rf']
    all_models = [create_model(i) for i in models]

示例#3

0

显示文件

def get_pycaret_data():
    dataset = get_data("credit")
    data = dataset.sample(frac=0.95, random_state=786)
    data_unseen = dataset.drop(data.index)
    data.reset_index(inplace=True, drop=True)
    data_unseen.reset_index(inplace=True, drop=True)

    return data, data_unseen[:5]

示例#4

0

显示文件

文件： test_overflow.py 项目： wanghuihuilorna/pycaret

def test():
    from pycaret.datasets import get_data

    data = get_data("boston")
    from pycaret.regression import setup, create_model, tune_model

    s = setup(data, target="medv", silent=True, html=False, session_id=123)
    gbr = create_model("gbr")
    tuned_gbr = tune_model(gbr)
    xgboost = create_model("xgboost")
    tuned_xgboost = tune_model(xgboost)
    lightgbm = create_model("lightgbm")
    tuned_lightgbm = tune_model(lightgbm)
    assert 1 == 1

示例#5

0

显示文件

文件： desafio.py 项目： JCRCS/multi-actividad

#%%

from pycaret.datasets import get_data
# %%
data = get_data ('kiva')
#%%
import pandas as pd
df_corpus = pd.read_excel("../../storage/ClientesBotonPagos__202006.xlsx")
# %%
df_kaggle_train = pd.read_csv("../../storage/kaggle_train.csv")

df_kaggle_test = pd.read_csv("../../storage/kaggle_test.csv")

df_kaggle_sample = pd.read_csv("../../storage/kaggle_samplesub.csv")
# %%

df_corpus.describe()
# %%
df_corpus.head()
# %%
df_corpus.NOMBRE_COMERCIO.head()
# %%
from nltk.corpus import stopwords
stop_words = stopwords.words('spanish')

from stop_words import get_stop_words
stop_words = get_stop_words('spanish')
# %%
stop_words
# %%
from pycaret.nlp import *

示例#6

0

显示文件

from pycaret.datasets import get_data
boston_data = get_data('boston')

示例#7

0

显示文件

#!/usr/bin/env python3

# Created by Paul A. Gureghian in June 2020. #
# This Python program demos the PyCaret ML library. #

# Start the program. #

# Import PyCaret. #
from pycaret.datasets import get_data
diabetes = get_data('diabetes')

示例#8

0

显示文件

from pycaret.utils import version
version()
from pycaret.datasets import get_data
data = get_data('insurance')
from pycaret.regression import *
reg1 = setup(data,
             target='charges',
             session_id=123,
             log_experiment=True,
             experiment_name='insurance1')
from pycaret.regression import *
r2 = setup(data,
           target='charges',
           session_id=123,
           normalize=True,
           polynomial_features=True,
           trigonometry_features=True,
           feature_interaction=True,
           bin_numeric_features=['age', 'bmi'])

best_model = compare_models(fold=5)

lr = create_model('lr')

save_model(lr, model_name='deploy_model')

示例#9

0

显示文件

# -*- coding: utf-8 -*-
"""UC1_CreditCardDefault.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1aQFNB8eIe6HS430IbSx8LdoMCI7KcFmx
"""

!pip install pycaret

from pycaret.utils import enable_colab
enable_colab()

from pycaret.datasets import get_data
credit_data = get_data('credit')
credit_data.shape

data = credit_data.sample(frac=0.95, random_state=786).reset_index(drop=True)
data_unseen = credit_data.drop(data.index).reset_index(drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

from pycaret.classification import *
exp = setup(data = data,target='default',session_id=123)

compare_models()

catboost = create_model('catboost')

示例#10

0

显示文件

文件： pycaret-cli.py 项目： tusharkalecam/pycaret-demo-at

#getting input parameters from sys
import sys

dataset = sys.argv[1]
target = sys.argv[2]
exp_name = str(dataset) + '_exp'

#import dataset using sys arg
from pycaret.datasets import get_data

data = get_data(dataset)

#initialize setup
from pycaret.classification import setup, compare_models, blend_models, tune_model, save_model, deploy_model, automl

clf1 = setup(data,
             target=target,
             silent=True,
             html=False,
             log_experiment=True,
             experiment_name=exp_name)

#compare models and select top5
top5 = compare_models(n_select=5, blacklist=['catboost'])

#blend top 5 models
blender = blend_models(estimator_list=top5)

#tune best model
tuned_best_model = tune_model(top5[0])

示例#11

0

显示文件

文件： model.py 项目： rashmithekkath/Cloud_Deployment_Insurance

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 16 00:48:48 2021

@author: Rashmi Thekkath
"""

from pycaret.datasets import get_data
data = get_data("insurance")

from pycaret.regression import *
s = setup(data, target='charges', session_id=123)

lr = create_model('lr')
plot_model(lr)
#linear regression - 10 fold cross validation

save_model(lr, model_name='C:/Users/rashm/Insurance_cloud/deployment_28042020')
#To save the linear regression model from this python notebook as a filein Insurance_cloud folder
#Saved as a pkl file

示例#12

0

显示文件

from pycaret.datasets import get_data
from pycaret.classification import *


data = get_data('titanic')
clf1 = setup(data, preprocess=True, target='Survived', session_id=124, log_experiment=True, experiment_name='tt6', log_data=True, silent=True)
lr = create_model('lr')
save_model(lr, model_name="titanic_lr")

示例#13

0

显示文件

文件： caret-assignmodel-clurstering.py 项目： devSeungGwan/pycaret-scratch

from pycaret.datasets import get_data
from pycaret.clustering import setup, create_model, assign_model

jewellery = get_data('jewellery')
clu = setup(data=jewellery)
kmeans = create_model('kmeans')
kmeans_results = assign_model(kmeans)

示例#14

0

显示文件

文件： pycaret.py 项目： Godcomplex11/DU

#Topic: Py Caret
#-----------------------------
#libraries
#https://pycaret.org/
#PyCaret is an open source, low-code machine learning library in Python that allows you to go from preparing your data to deploying your model within minutes in your choice of notebook environment.
pip install pyCaret
pip install SQLA1chemy
import pycaret
import pandas as pd

# Loading data from pycaret
#datasets - https://pycaret.org/get-data/


from pycaret.datasets import get_data
data = get_data('juice') 


#env for modeling
#https://pycaret.org/setup/
#Classification	from pycaret.classification import *
#Regression	from pycaret.regression import *
#Clustering	from pycaret.clustering import *
#Anomaly Detection	from pycaret.anomaly import *
#Natural Language Processing	from pycaret.nlp import *
#Association Rule Mining	from pycaret.arules import *

#classification
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# Importing module and initializing setup

示例#15

0

显示文件

文件： Regression_model.py 项目： PROFESSORRQ/pycaret

from pycaret.utils import version
version()


# In[3]:


from pycaret.datasets import get_data


# In[4]:


# Internet connection is required
dataSets = get_data('index')
dataSets


# In[5]:


# Internet connection is required
bike_df = get_data("bike")
# This is regression dataset. The values in medv are continuous values


# In[6]:


print(bike_df.shape)

示例#16

0

显示文件

import pandas as pd

data = pd.read_csv('c:/path_to_data/file.csv')

from pycaret.datasets import get_data

data = get_data('juice')

示例#17

0

显示文件

文件： app.py 项目： lindavik/streamlit-tinkering

def start():
    diabetes: pd.DataFrame = get_data('diabetes')
    print(diabetes)
    experiment = setup(diabetes, target='Class variable')
    compare_models()

示例#18

0

显示文件


## My ARGS

# Pycaret Args
_input_file = None
_demo_dataset = 'boston'
_pandas_profiling = True # Default is True which gives detailed 
_target = 'medv' # default Target cclass for Boston data
_silent_preproccessing = True
import logging
#import the dataset from pycaret repository
import pandas as pd
if _input_file == None and _demo_dataset == None:
    from pycaret.datasets import get_data
    input_data = get_data('boston', profile = True)
    data = input_data.sample(frac=0.9, random_state=786).reset_index(drop=True)
    data_unseen = input_data.drop(data.index).reset_index(drop=True)

    print('Data for Modeling: ' + str(data.shape))
    print('Unseen Data For Predictions: ' + str(data_unseen.shape))
elif _demo_dataset in ['diabetes', 'boston']:
    from pycaret.datasets import get_data
    input_data = get_data(_demo_dataset, profile = True)
    data = input_data.sample(frac=0.9, random_state=786).reset_index(drop=True)
    data_unseen = input_data.drop(data.index).reset_index(drop=True)

    print('Data for Modeling: ' + str(data.shape))
    print('Unseen Data For Predictions: ' + str(data_unseen.shape))
else:
    input_data = pd.read_csv(_input_file)

示例#19

0

显示文件

文件： trained_model.py 项目： Remxy/Bill_Prediction

#lOAD DATA
from pycaret.datasets import get_data
data = get_data('datasets_insurance')
data

#SHOW EXPERIMENT 1
from pycaret.regression import *
s = setup(data, target = 'charges', session_id = 123)

#CREATE LOGISTIC REGRESSION MODEL
lr = create_model('lr')

#PLOT LR MODEL
plot_model(lr)

#EXPLORE EXPERIMENT 2
s2 = setup(data, target = 'charges', session_id = 123,
           normalize = True,
           polynomial_features = True, trigonometry_features = True, feature_interaction=True, 
           bin_numeric_features= ['age', 'bmi'])

#SHOW COLUMNS IN EXPERIMENT 2
s2[0].columns

#CREATE EXPERIMENT 2 MODEL
lr = create_model('lr')

#PLOT THE MODEL
plot_model(lr)

#save the model

示例#20

0

显示文件

文件： ML with PyCaret.py 项目： kurtholst/databricks_proj

# Databricks notebook source


# COMMAND ----------

from pycaret.classification import *

# COMMAND ----------

from pycaret.datasets import get_data
dataset = get_data('credit')

# COMMAND ----------

data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True)
data_unseen = dataset.drop(data.index).reset_index(drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

# COMMAND ----------

exp_clf101 = setup(data = data, target = 'default', session_id=123)

# COMMAND ----------

rf = create_model('rf')

# COMMAND ----------

tuned_rf = tune_model(rf)

示例#21

0

显示文件

文件： training_model.py 项目： gunjany/Machine_learning

# Making and training the Linear Regression model on the dataset = 'insurance'
from pycaret.datasets import get_data

dataset = get_data('insurance')

from pycaret.regression import *
# Experiment 1 using simple model creation without any feature scaling
# s1 = setup(dataset, target = 'charges', session_id = 123)

# lr = create_model('lr')
# plot_model(lr)

# Experiment 2 adding some additional parameters
s2 = setup(dataset, target = 'charges', session_id = 123,
          normalize = True,
          polynomial_features = True,
          trigonometry_features = True,
          feature_interaction = True,
          bin_numeric_features = ['age', 'bmi'])

lr = create_model('lr')

plot_model(lr)
save_model(lr, 'deployment_30052020')

# import requests
# url = 'https://pycaret-insurance.herokuapp.com/predict_api'
# pred = requests.post(url,json={'age':55, 'sex':'male', 'bmi':59, 'children':1, 'smoker':'male', 'region':'northwest'})
# print(pred.json())

示例#22

0

显示文件

文件： PYCARET- USING PYCARET TO BUILD A PREDICTIVE MODEL WITH THE BOSTON DATA.py 项目： faruq2021/My-ML-MODEL-NOTEBOOKS

# coding: utf-8

# BELOW IS A PREDICTIVE MODEL USING PYCARET AND BOSTON HOUSING DATA

# In[1]:

import pycaret
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pycaret.datasets import get_data

# In[2]:

df = get_data('boston')

# **Data Preprocessing
#
# This data is already cleaned from Kaggle.
#

# In[3]:

df.shape

# In[4]:

df.isnull().sum()

# the above shows that the datasets are complete and no

示例#23

0

显示文件

文件： pycaret_binary_classification.py 项目： Rakib091998/Auto_ML

# -*- coding: utf-8 -*-
"""Pycaret_Binary_Classification.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Gsukmin5VPKFwpVav7wix6JQKb9nUAdf
"""

# Installing pycaret
!pip install pycaret

# Getting the Data
from pycaret.datasets import get_data
dataset = get_data('credit')

#check the shape of data
dataset.shape

#a sample of 1200 records has been withheld from the original dataset to be used for predictions
data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True)
data_unseen = dataset.drop(data.index).reset_index(drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

"""# Setting up Environment in PyCaret"""

from pycaret.classification import *

exp_clf101 = setup(data = data, target = 'default', session_id=123)

示例#24

0

显示文件

文件： pycaret_example.py 项目： christian-oleary/emmv

'''
Author: Christian O'Leary
Email: [email protected]
'''

from emmv import emmv_scores

# Adapted from https://pycaret.org/setup/
# Importing dataset
from pycaret.datasets import get_data
anomalies = get_data('anomaly')

# Importing module and initializing setup
from pycaret.anomaly import *
anomaly_setup = setup(data=anomalies)

# create a model
model = create_model('iforest')
results = assign_model(model)

# Get EM & MV scores
test_scores = emmv_scores(model, anomalies)
print('Excess Mass score;', test_scores['em'])
print('Mass Volume score:', test_scores['mv'])

示例#25

0

显示文件

#dataset and target
dataset = 'juice'
target = 'Purchase'

#checking version
from pycaret.utils import version
version()

import time
t0 = time.time()

#loading dataset
from pycaret.datasets import get_data
data = get_data(dataset, verbose=False)

#init regression
from pycaret.classification import setup
exp1 = setup(data, target=target, silent=True, html=False, verbose=False)

#RECEIPE #1 - SELECT TOP 5 MODELS
from pycaret.classification import compare_models
top5 = compare_models(n_select=5,
                      whitelist=['dt', 'lr', 'rf', 'lightgbm', 'xgboost'])

#RECEIPE #2 - TUNE TOP 5 MODELS
from pycaret.classification import tune_model
tuned_top5 = [tune_model(i) for i in top5]
print(len(tuned_top5))

#RECIPE #3
from pycaret.classification import blend_models