Exemplo n.º 1
0
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle
from ionyx import Blender
from ionyx.datasets import DataSetLoader

print('Beginning blender test...')

_, X, y = DataSetLoader.load_otto_group()
X, y = shuffle(X, y, n_samples=10000)
X = X[:, :30]
models = [('logistic', LogisticRegression()), ('svc', SVC()), ('decision_tree', DecisionTreeClassifier())]
cv = KFold()
blender = Blender(models=models, scoring_metric='neg_mean_squared_error')
blender.build_ensemble(X, y, cv, retrain=True)
print(blender.ensemble_)

_, X, y = DataSetLoader.load_otto_group()
X, y = shuffle(X, y, n_samples=10000)
X = X[:, :30]
models = [('logistic', LogisticRegression()), ('svc', SVC()), ('decision_tree', DecisionTreeClassifier()),
          ('stacker', LogisticRegression())]
layer_mask = [0, 0, 0, 1]
cv = KFold()
layer_cv = KFold()
blender = Blender(models=models, scoring_metric='neg_mean_squared_error')
blender.build_stacking_ensemble(X, y, cv, layer_cv, layer_mask, retrain=True)
print(blender.ensemble_)
Exemplo n.º 2
0
data, _, _ = DataSetLoader.load_property_inspection()
viz = Visualizer(data)
viz.feature_distributions()
viz.feature_correlations()
viz.variable_relationship(['T1_V1', 'T1_V2'], category_vars=None)
viz.variable_relationship(['T1_V1', 'T1_V2', 'T2_V1'], category_vars=None)
viz.variable_relationship(['T2_V1'], category_vars=['T1_V4'])
viz.variable_relationship(['T2_V1'], category_vars=['T1_V4', 'T1_V7'])
viz.variable_relationship(['T1_V1', 'T2_V1'], category_vars=['T1_V7'])

data, _, _, _ = DataSetLoader.load_bike_sharing()
viz = Visualizer(data)
viz.sequential_relationships()

data, _, _ = DataSetLoader.load_otto_group()
X_cols = data.columns[1:].tolist()
y_col = data.columns[0]
viz = Visualizer(data)
pca = PCA()
viz.transform(pca, X_columns=X_cols)
viz.transform(pca, X_columns=X_cols, y_column=y_col, task='classification')
viz.transform(pca, X_columns=X_cols, y_column=y_col, task='regression')

data, _, _ = DataSetLoader.load_otto_group()
data = data.iloc[:10000, :30]
X_cols = data.columns[1:].tolist()
y_col = data.columns[0]
viz = Visualizer(data)
viz.feature_importance(X_columns=X_cols,
                       y_column=y_col,