示例#1
0
    def setUp(self):
        self.size = 100
        self.X = np.random.rand(self.size, 1)
        self.y = self.X + np.random.randn(self.size, 1)
        concatenator = Concatenator()
        gaussian_clustering = GaussianMixture(n_components=3)
        dbscan = DBSCAN(eps=0.5)
        mixer = CustomCombination()
        linear_model = LinearRegression()
        self.steps = steps = [
            ('Concatenate_Xy', concatenator),
            ('Gaussian_Mixture', gaussian_clustering),
            ('Dbscan', dbscan),
            ('Combine_Clustering', mixer),
            ('Regressor', linear_model),
        ]

        self.connections = {
            'Concatenate_Xy':
            dict(df1='X', df2='y'),
            'Gaussian_Mixture':
            dict(X=('Concatenate_Xy', 'Xy')),
            'Dbscan':
            dict(X=('Concatenate_Xy', 'Xy')),
            'Combine_Clustering':
            dict(dominant=('Dbscan', 'predict'),
                 other=('Gaussian_Mixture', 'predict')),
            'Regressor':
            dict(X='X', y='y')
        }
示例#2
0
    def setUp(self):
        self.size = 1000
        self.X = pd.DataFrame(dict(X=np.random.rand(self.size, )))
        self.y = pd.DataFrame(dict(y=(np.random.rand(self.size, ))))
        concatenator = Concatenator()
        gaussian_clustering = GaussianMixture(n_components=3)
        dbscan = DBSCAN(eps=0.5)
        mixer = CustomCombination()
        linear_model = LinearRegression()
        steps = [
            ('Concatenate_Xy', concatenator),
            ('Gaussian_Mixture', gaussian_clustering),
            ('Dbscan', dbscan),
            ('Combine_Clustering', mixer),
            ('Regressor', linear_model),
        ]

        connections = {
            'Concatenate_Xy':
            dict(df1='X', df2='y'),
            'Gaussian_Mixture':
            dict(X=('Concatenate_Xy', 'predict')),
            'Dbscan':
            dict(X=('Concatenate_Xy', 'predict')),
            'Combine_Clustering':
            dict(dominant=('Dbscan', 'predict'),
                 other=('Gaussian_Mixture', 'predict')),
            'Regressor':
            dict(X='X', y='y')
        }

        self.steps_external = [
            ('_External', concatenator),
            ('Gaussian_Mixture', gaussian_clustering),
            ('Dbscan', dbscan),
            ('Combine_Clustering', mixer),
            ('Regressor', linear_model),
        ]

        self.connections_external = {
            '_External':
            dict(df1='X', df2='y'),
            'Gaussian_Mixture':
            dict(X=('Concatenate_Xy', 'predict')),
            'Dbscan':
            dict(X=('Concatenate_Xy', 'predict')),
            'Combine_Clustering':
            dict(dominant=('Dbscan', 'predict'),
                 other=('Gaussian_Mixture', 'predict')),
            'Regressor':
            dict(X='X', y='y')
        }

        self.steps = steps
        self.connections = connections
        self.pgraph = PipeGraph(steps=steps, fit_connections=connections)
        self.pgraph.fit(self.X, self.y)
示例#3
0
    def setUp(self):
        self.size = 100
        self.X = pd.DataFrame(dict(X=np.random.rand(self.size, )))
        self.y = pd.DataFrame(dict(y=np.random.rand(self.size, )))
        concatenator = Concatenator()
        gaussian_clustering = GaussianMixture(n_components=3)
        dbscan = DBSCAN(eps=0.5)
        mixer = CustomCombination()
        paellaModel = Paella(regressor=LinearRegression,
                             noise_label=None,
                             max_it=10,
                             regular_size=100,
                             minimum_size=30,
                             width_r=0.95,
                             power=10,
                             random_state=42)
        linear_model = LinearRegression()
        steps = [
            ('Concatenate_Xy', concatenator),
            ('Gaussian_Mixture', gaussian_clustering),
            ('Dbscan', dbscan),
            ('Combine_Clustering', mixer),
            ('Paella', paellaModel),
            ('Regressor', linear_model),
        ]

        connections = {
            'Concatenate_Xy':
            dict(df1='X', df2='y'),
            'Gaussian_Mixture':
            dict(X=('Concatenate_Xy', 'predict')),
            'Dbscan':
            dict(X=('Concatenate_Xy', 'predict')),
            'Combine_Clustering':
            dict(dominant=('Dbscan', 'predict'),
                 other=('Gaussian_Mixture', 'predict')),
            'Paella':
            dict(X='X',
                 y='y',
                 classification=('Combine_Clustering', 'predict')),
            'Regressor':
            dict(X='X', y='y', sample_weight=('Paella', 'predict'))
        }
        self.steps = steps
        self.connections = connections
        self.pgraph = PipeGraph(steps=steps, fit_connections=connections)
示例#4
0
from pipegraph.base import PipeGraph, Concatenator
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

iris = load_iris()
X = iris.data
y = iris.target

scaler = MinMaxScaler()
gaussian_nb = GaussianNB()
svc = SVC()
mlp = MLPClassifier()
concatenator = Concatenator()

steps = [('scaler', scaler), ('gaussian_nb', gaussian_nb), ('svc', svc),
         ('concat', concatenator), ('mlp', mlp)]

###############################################################################
# In this example we use a :class:`PipeGraphClassifier` because the result is a classification and we want to take advantage of Scikit-Learn default scoring method for classifiers.

pgraph = PipeGraph(steps=steps)
(pgraph.inject(sink='scaler', sink_var='X', source='_External',
               source_var='X').inject('gaussian_nb', 'X', 'scaler').inject(
                   'gaussian_nb', 'y',
                   source_var='y').inject('svc', 'X', 'scaler').inject(
                       'svc', 'y',
                       source_var='y').inject('concat', 'X1', 'scaler').inject(
                           'concat', 'X2',