def __init__(self): '''You may here define the structure of your model. You can create your own type of ensemble. You can make ensembles of pipelines or pipelines of ensembles. This example votes among two classifiers: BasicClassifier and a pipeline whose classifier is itself an ensemble of GaussianNB classifiers.''' fancy_classifier = Pipeline([ ('preprocessing', Preprocessor()), ('classification', BaggingClassifier(base_estimator=GaussianNB())) ]) self.clf = VotingClassifier(estimators=[('basic', BasicClassifier()), ('fancy', fancy_classifier)], voting='soft')
def test(D): '''Function to try some examples classifiers''' classifier_dict = { '1. MonsterClassifier': MonsterClassifier(), '2. SimplePipeline': Pipeline([('prepro', Preprocessor()), ('classif', BasicClassifier())]), '3. RandomPred': RandomPredictor(), '4. Linear Discriminant Analysis': LinearDiscriminantAnalysis()} for key in classifier_dict: myclassifier = classifier_dict[key] acc = D.ClfScatter(myclassifier) # Replace by a call to ClfScatter return acc # Return the last accuracy (important to get the correct answer in the TP)
def __init__(self): '''You may here define the structure of your model. You can create your own type of ensemble. You can make ensembles of pipelines or pipelines of ensembles. This example votes among two classifiers: BasicClassifier and a pipeline whose classifier is itself an ensemble of GaussianNB classifiers.''' fancy_classifier = Pipeline([ ('preprocessing', Preprocessor()), ('classification', BaggingClassifier(base_estimator=GaussianNB(),random_state=1)) ]) self.clf = VotingClassifier(estimators=[ ('Linear Discriminant Analysis', LinearDiscriminantAnalysis()), ('Gaussian Classifier', GaussianNB()), ('Support Vector Machine', SVC(probability=True)), ('Fancy Classifier', fancy_classifier)], voting='soft')
#!/usr/bin/env python2 # -*- coding: utf-8 -*- itself. from DataManager import DataManager from zPreprocessor import Preprocessor input_dir = "../public_data" output_dir = "../res" basename = 'movierec' D = DataManager(basename, input_dir) # Load data print("*** Original data ***") print D Prepro = Preprocessor() # Preprocess on the data and load it back into D D.data['X_train'] = Prepro.fit_transform(D.data['X_train'], D.data['Y_train']) D.data['X_valid'] = Prepro.transform(D.data['X_valid']) D.data['X_test'] = Prepro.transform(D.data['X_test']) # Here show something that proves that the preprocessing worked fine print("*** Transformed data ***") print D # Preprocessing gives you opportunities of visualization: # Scatter-plots of the 2 first principal components # Scatter plots of pairs of features that are most relevant import matplotlib.pyplot as plt X = D.data['X_train'] Y = D.data['Y_train'] plt.scatter(X[:, 0], X[:, 1], c=Y)
def __init__(self): regressor = Pipeline([('preprocessing', Preprocessor()), ('regression', linear_model.LinearRegression())]) self.clf = regressor
from sklearn.metrics import accuracy_score # Interesting point: the M2 prepared challenges using sometimes AutoML challenge metrics # not scikit-learn metrics. For example: from libscores import bac_metric from libscores import auc_metric from zDataManager import DataManager # The class provided by binome 1 basename = 'Iris' D = DataManager(basename, input_dir) # Load data print D # Here we define 3 classifiers and compare them classifier_dict = { 'Pipeline': Pipeline([('prepro', Preprocessor()), ('classif', BasicClassifier())]), 'Classifier': Classifier(), 'Random': RandomPredictor() } print "Classifier\tAUC\tBAC\tACC\tError bar" for key in classifier_dict: myclassifier = classifier_dict[key] # Train Ytrue_tr = D.data['Y_train'] myclassifier.fit(D.data['X_train'], Ytrue_tr) # Some classifiers and cost function use a different encoding of the target