class GaussianProcessRegressorImpl():
    def __init__(self,
                 kernel=None,
                 alpha=1e-10,
                 optimizer='fmin_l_bfgs_b',
                 n_restarts_optimizer=0,
                 normalize_y=False,
                 copy_X_train=True,
                 random_state=None):
        self._hyperparams = {
            'kernel': kernel,
            'alpha': alpha,
            'optimizer': optimizer,
            'n_restarts_optimizer': n_restarts_optimizer,
            'normalize_y': normalize_y,
            'copy_X_train': copy_X_train,
            'random_state': random_state
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
示例#2
0
def _fit_gp(X, y, kernel, max_gp):

    n_samples = X.shape[0]

    n_gp = min(n_samples, max_gp)

    X = X[(n_samples - n_gp):n_samples, :]
    y = y[(n_samples - n_gp):n_samples]

    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
    gp.fit(X, y)

    return gp
示例#3
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
示例#4
0
def fit(kernel, sample_indices, X, y, n_restarts_optimizer, normalize_y):
    """Fits a Gaussian Process Regression model on a subset of X and y using
    the provided covariance kernel and subset indices. This is used as a single
    worker payload in the parallel fitting process of the rBCM.

    TODO: take the sample_indices argument out of this function and keep it
          in the logic of the rBCM class alone. Just pass the X and y we'll
          actually use. For now keep it to avoid too many changes during the
          refactor, however.

    Args:
        kernel : sklearn kernel object
            The kernel specifying the covariance function of the Guassian
            Process.

        sample_indices : list of integers
            The indices of the subset of X and y to fit

        X : np.ndarray
            The locations of the points.
            Must match y in length.

        y : np.ndarray
            The values of the points at the X locations.
            Must match X in length.

        n_restarts_optimizer : non-negative integer
            The number of restarts to permit in the GPR. Look to scikit-learn's
            GPR implementation for more detail as it is passed through.

        normalize_y : boolean
            Whether to normalize the scale of y to improve fitting quality.
            See scikit-learn's GPR implementation for more detail.
    """
    gpr = GPR(kernel,
              n_restarts_optimizer=n_restarts_optimizer,
              copy_X_train=False,
              normalize_y=normalize_y)
    gpr.fit(X[sample_indices, :], y[sample_indices, :])
    return gpr
示例#5
0
文件: gpr.py 项目: lucaskolstad/rBCM
def fit(kernel, sample_indices, X, y, n_restarts_optimizer, normalize_y):
    """Fits a Gaussian Process Regression model on a subset of X and y using
    the provided covariance kernel and subset indices. This is used as a single
    worker payload in the parallel fitting process of the rBCM.

    TODO: take the sample_indices argument out of this function and keep it
          in the logic of the rBCM class alone. Just pass the X and y we'll
          actually use. For now keep it to avoid too many changes during the
          refactor, however.

    Args:
        kernel : sklearn kernel object
            The kernel specifying the covariance function of the Guassian
            Process.

        sample_indices : list of integers
            The indices of the subset of X and y to fit

        X : np.ndarray
            The locations of the points.
            Must match y in length.

        y : np.ndarray
            The values of the points at the X locations.
            Must match X in length.

        n_restarts_optimizer : non-negative integer
            The number of restarts to permit in the GPR. Look to scikit-learn's
            GPR implementation for more detail as it is passed through.

        normalize_y : boolean
            Whether to normalize the scale of y to improve fitting quality.
            See scikit-learn's GPR implementation for more detail.
    """
    gpr = GPR(kernel, n_restarts_optimizer=n_restarts_optimizer,
              copy_X_train=False, normalize_y=normalize_y)
    gpr.fit(X[sample_indices, :], y[sample_indices, :])
    return gpr
示例#6
0
def fix_patch(c, w, h, patch_size, corr_img, recover_img):
    width = recover_img.shape[1]
    height = recover_img.shape[2]
    g = GaussianProcessRegressor(kernel=RBF(patch_size))
    if w + patch_size >= width:
        w = patch_size - width - 1
    if h + patch_size >= height:
        h = patch_size - height
    channel_img = corr_img[c, w:w + patch_size, h:h + patch_size]
    noise_mask = channel_img == 0
    sample_coordinates = np.nonzero(~noise_mask)
    if len(sample_coordinates[0]) == 0:
        return fix_patch(c, w, h, patch_size * 2, corr_img, recover_img)
    # if len(sample_coordinates[0]) < 2 * patch_size:
    #     return fix_patch(c, w, h, patch_size * 2, recover_img)
    train_x = np.array(sample_coordinates).transpose()
    train_y = channel_img[sample_coordinates]
    g.fit(train_x, train_y)
    predict_coordinates = np.nonzero(noise_mask)
    predict_x = np.array(predict_coordinates).transpose()
    predict_y = g.predict(predict_x)
    recover_img[c, w:w + patch_size,
                h:h + patch_size][predict_coordinates] = predict_y
 def __init__(self,
              kernel=None,
              alpha=1e-10,
              optimizer='fmin_l_bfgs_b',
              n_restarts_optimizer=0,
              normalize_y=False,
              copy_X_train=True,
              random_state=None):
     self._hyperparams = {
         'kernel': kernel,
         'alpha': alpha,
         'optimizer': optimizer,
         'n_restarts_optimizer': n_restarts_optimizer,
         'normalize_y': normalize_y,
         'copy_X_train': copy_X_train,
         'random_state': random_state
     }
     self._wrapped_model = SKLModel(**self._hyperparams)
示例#8
0
    "K-Neighbors",
    "Radius Neighbors",
    "MLP",
    "Decision Tree",
    "Extra Tree",
    "SVR"
]

classifiers = [
    RandomForestRegressor(n_estimators=200, n_jobs=5,
                          random_state=randomstate),
    ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate),
    # GradientBoostingRegressor(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    # HistGradientBoostingClassifier(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    AdaBoostRegressor(n_estimators=200, random_state=randomstate),
    GaussianProcessRegressor(normalize_y=True),
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
    MLPRegressor(max_iter=10000000, random_state=randomstate),
    DecisionTreeRegressor(
        random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    ExtraTreeRegressor(random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
示例#9
0
			'ElasticNetCV':ElasticNetCV(),
			'EmpiricalCovariance':EmpiricalCovariance(),
			'ExtraTreeClassifier':ExtraTreeClassifier(),
			'ExtraTreeRegressor':ExtraTreeRegressor(),
			'ExtraTreesClassifier':ExtraTreesClassifier(),
			'ExtraTreesRegressor':ExtraTreesRegressor(),
			'FactorAnalysis':FactorAnalysis(),
			'FastICA':FastICA(),
			'FeatureAgglomeration':FeatureAgglomeration(),
			'FunctionTransformer':FunctionTransformer(),
			'GMM':GMM(),
			'GaussianMixture':GaussianMixture(),
			'GaussianNB':GaussianNB(),
			'GaussianProcess':GaussianProcess(),
			'GaussianProcessClassifier':GaussianProcessClassifier(),
			'GaussianProcessRegressor':GaussianProcessRegressor(),
			'GaussianRandomProjection':GaussianRandomProjection(),
			'GenericUnivariateSelect':GenericUnivariateSelect(),
			'GradientBoostingClassifier':GradientBoostingClassifier(),
			'GradientBoostingRegressor':GradientBoostingRegressor(),
			'GraphLasso':GraphLasso(),
			'GraphLassoCV':GraphLassoCV(),
			'HuberRegressor':HuberRegressor(),
			'Imputer':Imputer(),
			'IncrementalPCA':IncrementalPCA(),
			'IsolationForest':IsolationForest(),
			'Isomap':Isomap(),
			'KMeans':KMeans(),
			'KNeighborsClassifier':KNeighborsClassifier(),
			'KNeighborsRegressor':KNeighborsRegressor(),
			'KernelCenterer':KernelCenterer(),
示例#10
0
if __name__ == '__main__':
    # initial parameters
    #  sklearn will adjust these parameters based on data
    #  see gpr.kernel_
    length = 1.5
    period = 2 * np.pi
    noise_level = .6
    # define problem
    x, y = generate_data()  # available data
    finex = np.linspace(min(x), max(x), 77)  # prediction locations
    # krige
    #  step 1: define correlation function
    kernel = ExpSineSquared(length_scale=length, periodicity=period) +\
             WhiteKernel(noise_level)
    #  step 2: incorporate available data
    gpr = GaussianProcessRegressor(kernel=kernel)
    gpr.fit(x[:, None], y)
    print(gpr.kernel_)
    #  step 3: make predictions
    fineym, fineye = gpr.predict(finex[:, None], return_std=True)

    # show results
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    show_data(ax, x, y, finex, myfunc)
    show_krig(ax, finex, fineym, fineye)
    ax.legend()
    plt.show()
# end __main__
                                       test_size=0.2,
                                       random_state=np.random.randint(1, 1000))

# Separate output from inputs
y_train = train_set['time_to_failure']
x_train_seg = train_set['segment_id']
x_train = train_set.drop(['time_to_failure', 'segment_id'], axis=1)

y_test = test_set['time_to_failure']
x_test_seg = test_set['segment_id']
x_test = test_set.drop(['time_to_failure', 'segment_id'], axis=1)

# y_train = np.around(y_train.values, decimals=2)

# mlpReg = MLPRegressor(verbose=True, tol=0.0001, max_iter=200000, n_iter_no_change=10000, hidden_layer_sizes=(200,))
# Instantiate a Gaussian Process model
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(x_train, y_train)

# Create an variable to pickle and open it in write mode
mh = ModelHolder(gp, most_dependent_columns)
mh.save(model_name)
gp = None
mh_new = load_model(model_name)
gp, most_dependent_columns = mh_new.get()
y_pred = gp.predict(x_test)
# y_pred = pd.Series(y_pred).apply(lambda x: float(x / 10))

print('MAE for Multi Layer Perceptron', mean_absolute_error(y_test, y_pred))