def setUp(self): self.size = 100 self.X = np.random.rand(self.size, 1) self.y = self.X + np.random.randn(self.size, 1) concatenator = Concatenator() gaussian_clustering = GaussianMixture(n_components=3) dbscan = DBSCAN(eps=0.5) mixer = CustomCombination() linear_model = LinearRegression() self.steps = steps = [ ('Concatenate_Xy', concatenator), ('Gaussian_Mixture', gaussian_clustering), ('Dbscan', dbscan), ('Combine_Clustering', mixer), ('Regressor', linear_model), ] self.connections = { 'Concatenate_Xy': dict(df1='X', df2='y'), 'Gaussian_Mixture': dict(X=('Concatenate_Xy', 'Xy')), 'Dbscan': dict(X=('Concatenate_Xy', 'Xy')), 'Combine_Clustering': dict(dominant=('Dbscan', 'predict'), other=('Gaussian_Mixture', 'predict')), 'Regressor': dict(X='X', y='y') }
def setUp(self): self.size = 1000 self.X = pd.DataFrame(dict(X=np.random.rand(self.size, ))) self.y = pd.DataFrame(dict(y=(np.random.rand(self.size, )))) concatenator = Concatenator() gaussian_clustering = GaussianMixture(n_components=3) dbscan = DBSCAN(eps=0.5) mixer = CustomCombination() linear_model = LinearRegression() steps = [ ('Concatenate_Xy', concatenator), ('Gaussian_Mixture', gaussian_clustering), ('Dbscan', dbscan), ('Combine_Clustering', mixer), ('Regressor', linear_model), ] connections = { 'Concatenate_Xy': dict(df1='X', df2='y'), 'Gaussian_Mixture': dict(X=('Concatenate_Xy', 'predict')), 'Dbscan': dict(X=('Concatenate_Xy', 'predict')), 'Combine_Clustering': dict(dominant=('Dbscan', 'predict'), other=('Gaussian_Mixture', 'predict')), 'Regressor': dict(X='X', y='y') } self.steps_external = [ ('_External', concatenator), ('Gaussian_Mixture', gaussian_clustering), ('Dbscan', dbscan), ('Combine_Clustering', mixer), ('Regressor', linear_model), ] self.connections_external = { '_External': dict(df1='X', df2='y'), 'Gaussian_Mixture': dict(X=('Concatenate_Xy', 'predict')), 'Dbscan': dict(X=('Concatenate_Xy', 'predict')), 'Combine_Clustering': dict(dominant=('Dbscan', 'predict'), other=('Gaussian_Mixture', 'predict')), 'Regressor': dict(X='X', y='y') } self.steps = steps self.connections = connections self.pgraph = PipeGraph(steps=steps, fit_connections=connections) self.pgraph.fit(self.X, self.y)
def setUp(self): self.size = 100 self.X = pd.DataFrame(dict(X=np.random.rand(self.size, ))) self.y = pd.DataFrame(dict(y=np.random.rand(self.size, ))) concatenator = Concatenator() gaussian_clustering = GaussianMixture(n_components=3) dbscan = DBSCAN(eps=0.5) mixer = CustomCombination() paellaModel = Paella(regressor=LinearRegression, noise_label=None, max_it=10, regular_size=100, minimum_size=30, width_r=0.95, power=10, random_state=42) linear_model = LinearRegression() steps = [ ('Concatenate_Xy', concatenator), ('Gaussian_Mixture', gaussian_clustering), ('Dbscan', dbscan), ('Combine_Clustering', mixer), ('Paella', paellaModel), ('Regressor', linear_model), ] connections = { 'Concatenate_Xy': dict(df1='X', df2='y'), 'Gaussian_Mixture': dict(X=('Concatenate_Xy', 'predict')), 'Dbscan': dict(X=('Concatenate_Xy', 'predict')), 'Combine_Clustering': dict(dominant=('Dbscan', 'predict'), other=('Gaussian_Mixture', 'predict')), 'Paella': dict(X='X', y='y', classification=('Combine_Clustering', 'predict')), 'Regressor': dict(X='X', y='y', sample_weight=('Paella', 'predict')) } self.steps = steps self.connections = connections self.pgraph = PipeGraph(steps=steps, fit_connections=connections)
from pipegraph.base import PipeGraph, Concatenator import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.neural_network import MLPClassifier iris = load_iris() X = iris.data y = iris.target scaler = MinMaxScaler() gaussian_nb = GaussianNB() svc = SVC() mlp = MLPClassifier() concatenator = Concatenator() steps = [('scaler', scaler), ('gaussian_nb', gaussian_nb), ('svc', svc), ('concat', concatenator), ('mlp', mlp)] ############################################################################### # In this example we use a :class:`PipeGraphClassifier` because the result is a classification and we want to take advantage of Scikit-Learn default scoring method for classifiers. pgraph = PipeGraph(steps=steps) (pgraph.inject(sink='scaler', sink_var='X', source='_External', source_var='X').inject('gaussian_nb', 'X', 'scaler').inject( 'gaussian_nb', 'y', source_var='y').inject('svc', 'X', 'scaler').inject( 'svc', 'y', source_var='y').inject('concat', 'X1', 'scaler').inject( 'concat', 'X2',