Пример #1
0
def test_dirty_float_target_regression():
    titanic_data = load_titanic()
    data = pd.DataFrame({'one': np.repeat(np.arange(50), 2)})
    dirty = make_dirty_float()
    data['target'] = dirty
    with pytest.warns(UserWarning, match="Discarding dirty_float targets that "
                                         "cannot be converted to float."):
        clean(data, target_col="target")
    with pytest.warns(UserWarning, match="Discarding dirty_float targets that "
                                         "cannot be converted to float."):
        plot(data, target_col="target")

    # check if works for non dirty_float targets
    plot(titanic_data, 'survived')
Пример #2
0
"""
Adult Census Dataset Visualization
====================================
"""
# sphinx_gallery_thumbnail_number = 2
from dabl import plot
from dabl.datasets import load_adult
import matplotlib.pyplot as plt

# load the adult census housing dataset
# returns a plain dataframe
data = load_adult()

plot(data, 'income', scatter_alpha=.1)
plt.show()
Пример #3
0
"""
Diamonds Dataset Visualization
==========================================
Regression on the classical diamond dataset.
"""
# sphinx_gallery_thumbnail_number = 2
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from dabl import plot

X, y = fetch_openml('diamonds', as_frame=True, return_X_y=True)

plot(X, y)
plt.show()
Пример #4
0
# In[ ]:

import dabl

# In[ ]:

feature_df = bdf[final_feature_list]

# In[ ]:

dabl_data = dabl.clean(feature_df)

# In[ ]:

dabl.plot(dabl_data, target_col='save_within_48')

# In[ ]:

X = dabl_data.drop("save_within_48", axis=1)
Y = dabl_data.save_within_48

# In[ ]:

preprocessor = dabl.EasyPreprocessor()
X_trans = preprocessor.fit_transform(X)

# In[ ]:

fc = dabl.SimpleClassifier(random_state=0).fit(X_trans, Y)
Пример #5
0
"""
Ames Housing Dataset Visualization
====================================
"""
from dabl import plot
from dabl.datasets import load_ames
import matplotlib.pyplot as plt

# load the ames housing dataset
# returns a plain dataframe
data = load_ames()

plot(data, 'SalePrice')
plt.show()
# No. of unique items present in the categorical column

data.select_dtypes('object').nunique()

# Percentage of missing data in each columns present in the data

no_of_columns = data.shape[0]
percentage_of_missing_data = data.isnull().sum()/no_of_columns
print(percentage_of_missing_data)

# comparison of all other attributes with respect to Math Marks

plt.rcParams['figure.figsize'] = (18, 6)
plt.style.use('fivethirtyeight')
dabl.plot(data, target_col = 'math score')

# comparison of all other attributes with respect to Reading Marks

plt.rcParams['figure.figsize'] = (18, 6)
plt.style.use('fivethirtyeight')
dabl.plot(data, target_col = 'reading score')

# comparison of all other attributes with respect to Writing Marks

plt.rcParams['figure.figsize'] = (18, 6)
plt.style.use('fivethirtyeight')
dabl.plot(data, target_col = 'writing score')

# Inferential Statistics
Пример #7
0
    
    # print data(feature)shape
    wine.data.shape
    # print the wine data features (top 5 records)
    print (wine.data[0:5])
    print (wine.target)
    #Count number of observation in each class
    for i in set(wine.target):
        print('Class', i, ' -> ', list(wine.target).count(i))
        
explore()

# Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB as GNB

# Train the model using the training sets
# and Predict the response for test dataset
y_pred = GNB().fit(X_train, y_train).predict(X_test)

# Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy, how often is the classifier correct?
print(f"Accuracy: {100*metrics.accuracy_score(y_test, y_pred):.3f}%")

import matplotlib.pyplot as plt
from dabl import plot
from dabl.utils import data_df_from_bunch

plot(data_df_from_bunch(wine), 'target')
plt.show()
Пример #8
0
"""
Wine Classification Dataset Visualization
==========================================
"""
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from dabl import plot
from dabl.utils import data_df_from_bunch

wine_bunch = load_wine()
wine_df = data_df_from_bunch(wine_bunch)

plot(wine_df, 'target')
plt.show()