Exemplo n.º 1
0
    def test_regressor_modifications(self):
        regressor = KernelRidge(alpha=1e-8, kernel="rbf", gamma=0.1)
        kpcovr = self.model(mixing=0.5,
                            regressor=regressor,
                            kernel="rbf",
                            gamma=0.1)

        # KPCovR regressor matches the original
        self.assertTrue(
            regressor.get_params() == kpcovr.regressor.get_params())

        # KPCovR regressor updates its parameters
        # to match the original regressor
        regressor.set_params(gamma=0.2)
        self.assertTrue(
            regressor.get_params() == kpcovr.regressor.get_params())

        # Fitting regressor outside KPCovR fits the KPCovR regressor
        regressor.fit(self.X, self.Y)
        self.assertTrue(hasattr(kpcovr.regressor, "dual_coef_"))

        # Raise error during KPCovR fit since regressor and KPCovR
        # kernel parameters now inconsistent
        with self.assertRaises(ValueError) as cm:
            kpcovr.fit(self.X, self.Y)
            self.assertTrue(
                str(cm.message),
                "Kernel parameter mismatch: the regressor has kernel parameters "
                "{kernel: linear, gamma: 0.2, degree: 3, coef0: 1, kernel_params: None}"
                " and KernelPCovR was initialized with kernel parameters "
                "{kernel: linear, gamma: 0.1, degree: 3, coef0: 1, kernel_params: None}",
            )
Exemplo n.º 2
0
def kernel_ridge_regression():
    n_samples, n_features = 10, 5
    rng = np.random.RandomState(0)
    y = rng.randn(n_samples)
    X = rng.randn(n_samples, n_features)
    clf = KernelRidge(alpha=1.0)
    clf.fit(X, y)
    print(clf.alpha)
    print(clf.get_params())
Exemplo n.º 3
0
class KernelMethod(Classifier):
    def __init__(self, **kwargs):
        super().__init__()
        self._model = KernelRidge(**kwargs)
        self.hyperparams = self._model.get_params()
        self.enc = None

    def fit(self, X, Y):
        """
        Training model with data provided.
        Parameters
        ==========
        X: Pandas DataFrame. Attribute Values
        Y: Pandas Series.    Object labels.

        Returns
        =======
        void.
        """
        X = X.values
        Y = Y.values

        #Verify that the labels can be casted to floats
        try:
            float(Y[0])
        except ValueError:
            self.enc = OneHotEncoder()
            Y = self.enc.fit_transform(Y.reshape(-1, 1)).toarray()

        self._model.fit(X=X, y=Y)

    def predict(self, X):
        """
        Returns prediction label for X.
        
        Parameters
        ==========
        X: Pandas DataFrame -> Data to predict value

        Returns
        =======
        Prediction labels: array like of size (nsamples, [n_features])
        """
        pred = self._model.predict(X)
        if (self.enc is None):
            return pred
        else:
            return self.enc.inverse_transform(
                (pred == pred.max(axis=1,
                                  keepdims=1)).astype(float)).reshape(-1)
Exemplo n.º 4
0
    def fit(self, X, Y):
        """

        Fit the model with X and Y.

        Parameters
        ----------
        X:  ndarray, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples and
            n_features is the number of features.

            It is suggested that :math:`\\mathbf{X}` be centered by its column-
            means and scaled. If features are related, the matrix should be scaled
            to have unit variance, otherwise :math:`\\mathbf{X}` should be
            scaled so that each feature has a variance of 1 / n_features.

        Y:  ndarray, shape (n_samples, n_properties)
            Training data, where n_samples is the number of samples and
            n_properties is the number of properties

            It is suggested that :math:`\\mathbf{X}` be centered by its column-
            means and scaled. If features are related, the matrix should be scaled
            to have unit variance, otherwise :math:`\\mathbf{Y}` should be
            scaled so that each feature has a variance of 1 / n_features.

        Returns
        -------
        self: object
            Returns the instance itself.

        """

        if self.regressor is not None and not isinstance(
                self.regressor, KernelRidge):
            raise ValueError("Regressor must be an instance of `KernelRidge`")

        X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
        self.X_fit_ = X.copy()

        if self.n_components is None:
            if self.svd_solver != "arpack":
                self.n_components = X.shape[0]
            else:
                self.n_components = X.shape[0] - 1

        K = self._get_kernel(X)

        if self.center:
            self.centerer_ = KernelNormalizer()
            K = self.centerer_.fit_transform(K)

        self.n_samples_ = X.shape[0]

        if self.regressor is None:
            regressor = KernelRidge(
                kernel=self.kernel,
                gamma=self.gamma,
                degree=self.degree,
                coef0=self.coef0,
                kernel_params=self.kernel_params,
                **self.regressor_params,
            )
        else:
            regressor = self.regressor
            kernel_attrs = [
                "kernel", "gamma", "degree", "coef0", "kernel_params"
            ]
            if not all([
                    getattr(self, attr) == getattr(regressor, attr)
                    for attr in kernel_attrs
            ]):
                raise ValueError(
                    "Kernel parameter mismatch: the regressor has kernel parameters {%s}"
                    " and KernelPCovR was initialized with kernel parameters {%s}"
                    % (
                        ", ".join([
                            "%s: %r" % (attr, getattr(regressor, attr))
                            for attr in kernel_attrs
                        ]),
                        ", ".join([
                            "%s: %r" % (attr, getattr(self, attr))
                            for attr in kernel_attrs
                        ]),
                    ))

        # Check if regressor is fitted; if not, fit with precomputed K
        # to avoid needing to compute the kernel a second time
        self.regressor_ = check_krr_fit(regressor, K, X, Y)

        W = self.regressor_.dual_coef_.reshape(X.shape[0], -1)

        # Use this instead of `self.regressor_.predict(K)`
        # so that we can handle the case of the pre-fitted regressor
        Yhat = K @ W

        # When we have an unfitted regressor,
        # we fit it with a precomputed K
        # so we must subsequently "reset" it so that
        # it will work on the particular X
        # of the KPCovR call. The dual coefficients are kept.
        # Can be bypassed if the regressor is pre-fitted.
        try:
            check_is_fitted(regressor)

        except NotFittedError:
            self.regressor_.set_params(**regressor.get_params())
            self.regressor_.X_fit_ = self.X_fit_
            self.regressor_._check_n_features(self.X_fit_, reset=True)

        # Handle svd_solver
        self._fit_svd_solver = self.svd_solver
        if self._fit_svd_solver == "auto":
            # Small problem or self.n_components == 'mle', just call full PCA
            if max(X.shape) <= 500 or self.n_components == "mle":
                self._fit_svd_solver = "full"
            elif self.n_components >= 1 and self.n_components < 0.8 * min(
                    X.shape):
                self._fit_svd_solver = "randomized"
            # This is also the case of self.n_components in (0,1)
            else:
                self._fit_svd_solver = "full"

        self._fit(K, Yhat, W)

        self.ptk_ = self.pt__ @ K
        self.pty_ = self.pt__ @ Y

        if self.fit_inverse_transform:
            self.ptx_ = self.pt__ @ X

        self.pky_ = self.pkt_ @ self.pty_

        self.components_ = self.pkt_.T  # for sklearn compatibility
        return self
Exemplo n.º 5
0
def relationship_road_traffic_accidents():
    accidents = glob.glob('accident/*.csv')

    acc = 0
    for a in accidents:
        acc = ReadAccident.Accident(a)

    toronto_traffic = pd.read_csv('traffic/traffic-vehicle.csv')

    # Relationship between peak vehicle volume and # of accidents at that intersection

    shp_files = glob.glob('shapefiles/*.shp')

    shp_data_objs = []

    for shp in shp_files:
        print(shp)
        shp_obj = ReadSHP.ReadSHPFile(shp, shp)
        shp_data_objs.append(shp_obj)

    data = acc.data

    intersec_id = {}
    other_xs = {}

    print('Running')

    for i in range(len(data)):
        long = data[i].long
        lat = data[i].lat
        fatal = data[i].fatal

        min_index = 0
        min_dist = math.sqrt(
            math.pow(long - toronto_traffic.loc[0, 'Longitude'], 2) +
            math.pow(lat - toronto_traffic.loc[0, 'Latitude'], 2))
        for j in range(1, len(toronto_traffic.index.values)):
            dist = math.sqrt(
                math.pow(long - toronto_traffic.loc[j, 'Longitude'], 2) +
                math.pow(lat - toronto_traffic.loc[j, 'Latitude'], 2))
            if dist < min_dist:
                min_dist = dist
                min_index = j

        if min_index not in intersec_id:
            intersec_id[min_index] = 1
        else:
            intersec_id[min_index] += 1

        if min_index not in other_xs:
            missing_xs = []
            for s in shp_data_objs:
                missing_xs.append(
                    s.binary_search(
                        toronto_traffic.loc[min_index, 'Longitude'],
                        toronto_traffic.loc[min_index, 'Latitude']))
            other_xs[min_index] = missing_xs

    xs = []
    ys = []
    for j in intersec_id:
        dt = [toronto_traffic.loc[j, '8 Peak Hr Vehicle Volume']]
        dt.extend(other_xs[j])
        xs.append(dt)
        ys.append(intersec_id[j])

    print(xs)
    xs = np.array(xs)
    ys = np.array(ys)

    # xs = sm.add_constant(xs)

    model = sm.OLS(ys, xs).fit()

    print(model.summary())
    print(model.params)

    clf = KernelRidge(alpha=1.0)
    clf.fit(xs, ys)

    file = open('k_reg.pickle', 'wb')
    pickle.dump(clf, file)
    print(clf.get_params())
Exemplo n.º 6
0
from readFile import readDataSet
from sklearn.kernel_ridge import KernelRidge
from sklearn.externals import joblib

data, nrows, ncols = readDataSet("YearPredictionMSD20.txt")
X = data[:, 1:91]
y = data[:, 0]

clf = joblib.load('PCA_20k.pkl')
X = clf.transform(X)
print X
clf = KernelRidge(alpha=1, kernel="linear")
clf.fit(X, y)
print clf.predict(X)
print clf.get_params()
print clf.score(X, y)

joblib.dump(clf, "KRR_linear_20k.pkl")

# from readFile import readDataSet
# from sklearn.kernel_ridge import KernelRidge
# from sklearn.externals import joblib

# data, nrows, ncols = readDataSet("YearPredictionMSD20.txt")
# X = data[:,1:91]
# y = data[:,0]

# clf = KernelRidge(alpha = 1e-3)
# clf.fit(X, y)
# joblib.dump(clf, "linear_KRR_20k.pkl")
Exemplo n.º 7
0
from sklearn.preprocessing import StandardScaler
SupportVectorMachine_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', SupportVectorMachine)])
KernelRidge_pipe          = Pipeline([('standardize', StandardScaler()), ('regressor', KernelRidge)])
MultiLayerPerceptron_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', MultiLayerPerceptron)])
KNeighbors_pipe           = Pipeline([('standardize', StandardScaler()), ('regressor', KNeighbors)])
ExtraTree_pipe            = Pipeline([('standardize', StandardScaler()), ('regressor', ExtraTree)])
DecisionTree_pipe         = Pipeline([('standardize', StandardScaler()), ('regressor', DecisionTree)])
RandomForest_pipe         = Pipeline([('standardize', StandardScaler()), ('regressor', RandomForest)])
GradientBoosting_pipe     = Pipeline([('standardize', StandardScaler()), ('regressor', GradientBoosting)])
HistGradientBoosting_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', HistGradientBoosting)])


print()
print("Default Regressor Hyperparameter values:")
start = time.time()
print(KernelRidge.get_params())
end = time.time()
print("score with default values = ", getDefaultAccuracy(KernelRidge, KernelRidgeParameters, X, y))
print("Time Elapsed = ", end - start)

print()
start = time.time()
#get_GridSearchCV(KernelRidge, KernelRidgeParameters, X, y)
end = time.time()
print("Time Elapsed = ", end - start)

print()
start = time.time()
#get_RandomizedGridSearchCV(KernelRidge, KernelRidgeParameters, X, y)
end = time.time()
print("Time Elapsed = ", end - start)
Exemplo n.º 8
0
factors = [
    'cylinders', 'displacement', 'horsepower', 'acceleration', 'weight',
    'origin'
]
X = pd.DataFrame(df[factors].copy())
y = df['mpg'].copy()
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=324)

# In[165]:

clf.get_params()

# In[166]:

clf.fit(X_train, y_train)

# In[167]:

y_predicted = clf.predict(X_test)

# In[168]:

rmse = sqrt(mean_squared_error(y_true=y_test, y_pred=y_predicted))
rmse

# In[169]: