示例#1
0
def test_only_score_contains_sample_weight():
    mlflow.sklearn.autolog()

    from sklearn.gaussian_process import GaussianProcessRegressor

    assert "sample_weight" not in _get_arg_names(GaussianProcessRegressor.fit)
    assert "sample_weight" in _get_arg_names(GaussianProcessRegressor.score)

    mock_obj = mock.Mock()

    def mock_score(self, X, y, sample_weight=None):  # pylint: disable=unused-argument
        mock_obj(X, y, sample_weight)
        return 0

    assert inspect.signature(
        GaussianProcessRegressor.score) == inspect.signature(mock_score)

    GaussianProcessRegressor.score = mock_score
    model = GaussianProcessRegressor()
    X, y = get_iris()

    with mlflow.start_run() as run:
        model.fit(X, y)
        mock_obj.assert_called_once_with(X, y, None)

    run_id = run.info.run_id
    params, metrics, tags, artifacts = get_run_data(run_id)
    assert params == truncate_dict(
        stringify_dict_values(model.get_params(deep=True)))
    assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items()
    assert tags == get_expected_class_tags(model)
    assert MODEL_DIR in artifacts
    assert_predict_equal(load_model_by_run_id(run_id), model, X)
示例#2
0
def GPRTraining(XEstimate,XValidate,Parameters,class_labels):
    kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-05, 100000.0))
    #clf = GaussianProcessClassifier(kernel=kernel, n_restarts_optimizer=1)
    #clf = GaussianProcessRegressor(kernel= RBF(length_scale=1.0), optimizer=None)
    kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
    clf = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=1)
    clf.fit(XEstimate, class_labels)
    Yvalidate = clf.predict(XValidate)
    EstParameters = clf.get_params()
    
    return {"Yvalidate": Yvalidate,
            "EstParameters": EstParameters,
            "clf": clf}
def makeGaussianProcess():
    global y_t_pred, result
    prefix = "%s_GP_FULL" % (name)
    #kernel = RBF(1e1,(1e-5,1e7))
    kernel = RationalQuadratic()  #(1e1,(1e-5,1e7))
    #kernel = ExpSineSquared()#(1e1,(1e-5,1e7))
    model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
    x1 = x[:, 3:6:2]
    x_t1 = x_t[:, 3:6:2]
    y_t_pred = model.fit(x1, y).predict(x_t1)
    r = model.score(x1, y)
    print("score r = %s" % r)
    print "Coefficients: %s" % model.get_params()
    #print "Highest Coefficients: %s" % str(sorted(model.get_params(),key=lambda x:-x))
    print str(
        (model.kernel_, model.log_marginal_likelihood(model.kernel_.theta)))
    return prefix, model
# kern = gp.kernels.RBF() + gp.kernels.WhiteKernel()
model = GaussianProcessRegressor(
    kernel=kern,  # kernel instance, default=None
    alpha=0.01,  # float or array-like of shape(n_sample), default=1e-10
    optimizer=
    "fmin_l_bfgs_b",  # "fmin_l_bfgs_b” or callable, default="fmin_l_bfgs_b"
    n_restarts_optimizer=0,  # int, default=0
    normalize_y=False,  # boolean, optional (default: False)
    copy_X_train=True,  # bool, default=True
    random_state=None,  # int or RandomState, default=None
)
model.fit(train_x, train_y)
y_pred, y_std = model.predict(x.reshape(-1, 1), return_std=True)
# y_pred, y_std = model.predict(x.reshape(-1, 1), return_std=True)
log_marginal_likelihood = model.log_marginal_likelihood()  # 対数周辺尤度
params = model.get_params()  # 設定パラメータの取得(辞書)
scores = model.score(train_x, train_y)  # 決定係数R^2
# params = model.set_params()  # 設定パラメータの設定(辞書)
k_samples = model.sample_y(train_x, n_samples=5)  # 事後分布のカーネル関数をランダムに5つサンプリング

X_train = model.X_train_
y_train = model.y_train_
kernel = model.kernel_  # 予測に使用されたカーネル(最適化済みで最初に設定したパラメータとは異なる)
L = model.L_
alpha = model.alpha_
log_marginal_likelihood_value = model.log_marginal_likelihood_value_  # 対数周辺尤度

# plot
fig = plt.figure(figsize=(6, 4))
ax1 = fig.add_subplot(111)
for i in range(k_samples.shape[1]):
示例#5
0
def fitGaussianProc(patDXdTdata, patAvgXdata, params):
  '''
  Fits a GP on the change data (x, dx/dt)

  Parameters
  ----------
  patDXdTdata
  patAvgXdata
  estimNoise
  lengthScaleFactors
  plotTrajParams

  Returns
  -------

  '''

  # Mesh the input space for evaluations of the real function, the prediction and
  # its MSE
  assert(CTL == 1)
  nrBiomk = patDXdTdata.shape[0]
  #minX = np.amin(patAvgXdata, axis=0)
  #maxX = np.amax(patAvgXdata, axis=0)
  minX = np.array([np.nanmin(patAvgXdata[b], axis=0) for b in range(nrBiomk)])
  maxX = np.array([np.nanmax(patAvgXdata[b], axis=0) for b in range(nrBiomk)])
  assert not any(np.isnan(minX))
  assert not any(np.isnan(maxX))

  intervalSize = maxX-minX
  minX -= intervalSize/0.5
  maxX += intervalSize/0.5
  
  #print minX.shape, maxX.shape
  nrPointsToEval = 5000
  x_pred = np.zeros((nrPointsToEval, nrBiomk),float)
  dXdT_pred = np.zeros((nrPointsToEval, nrBiomk),float)
  sigma_pred = np.zeros((nrPointsToEval, nrBiomk),float)
  nrSamples = 100
  posteriorSamples = np.zeros((nrSamples, nrPointsToEval, nrBiomk),float)

  # print(avgXdata.shape, diag.shape)
  # print(avgXdata[diag == CTL,:].shape)
  # ctlXMean = np.nanmean(avgXdata[diag == CTL,:], axis = 0)
  # ctlXStd = np.nanstd(avgXdata[diag == CTL,:], axis = 0)

  # ctldXdTMean = np.nanmean(dXdTdata[diag == CTL,:], axis = 0)
  # ctldXdTStd = np.nanstd(dXdTdata[diag == CTL,:], axis = 0)

  # allXMean = np.nanmean(avgXdata, axis = 0)
  # allXStd = np.nanstd(avgXdata, axis = 0)

  # alldXdTMean = np.nanmean(dXdTdata, axis = 0)
  # alldXdTStd = np.nanstd(dXdTdata, axis = 0)

  patXMean = np.array([np.nanmean(patAvgXdata[b], axis=0) for b in range(nrBiomk)])
  patXStd = np.array([np.nanstd(patAvgXdata[b], axis=0) for b in range(nrBiomk)])

  patdXdTMean = np.array([np.nanmean(patDXdTdata[b], axis=0) for b in range(nrBiomk)])
  patdXdTStd = np.array([np.nanstd(patDXdTdata[b], axis=0) for b in range(nrBiomk)])

  gpList = []


  for b in range(nrBiomk):
    points = np.linspace(minX[b], maxX[b], nrPointsToEval)
    #print points.shape

    X = patAvgXdata[b]
    Y = patDXdTdata[b]
    notNanInd = np.logical_not(np.isnan(X))
    X = X[notNanInd]
    Y = Y[notNanInd]

    X = X.reshape(-1,1)
    Y = Y.reshape(-1,1)

    # X = (X - allXMean[b]) / allXStd[b] # standardizing the inputs and outputs
    # Y = (Y - alldXdTMean[b]) / alldXdTStd[b]
    # minX[b] = (minX[b] - allXMean[b]) / allXStd[b]
    # maxX[b] = (maxX[b] - allXMean[b]) / allXStd[b]

    X = (X - patXMean[b]) / patXStd[b]  # standardizing the inputs and outputs
    # Y = (Y - patdXdTMean[b]) / patdXdTStd[b]
    Y = Y  / patdXdTStd[b]
    minX[b] = (minX[b] - patXMean[b]) / patXStd[b]
    maxX[b] = (maxX[b] - patXMean[b]) / patXStd[b]

    #print 'Xshape, Yshape', X.shape, Y.shape
    lower, upper = np.abs(1/np.max(X)), np.abs(1/(np.min(X)+1e-6))
    if lower > upper:
      lower, upper = upper, lower
    mid = 1/np.abs(np.mean(X))

    # print("X", X[:20],'Y', Y[:20])
    # print(minX, maxX)


    #lengthScale = (np.max(X)-np.min(X))
    lengthScale = params['lengthScaleFactors'][b] * (np.max(X) - np.min(X))/2
    estimNoise = np.var(Y)/2 # this should be variance, as it is placed as is on the diagonal of the kernel, which is a covariance matrix
    #estimAlpha = np.ravel((np.std(Y))**2)
    #estimAlpha = np.var(Y)/2
    estimAlpha = np.std(Y)*2
    boundsFactor = 2.0
    #estimAlpha = 0
    #need to specity bounds as the lengthScale is optimised in the fit
    rbfKernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(length_scale=lengthScale, length_scale_bounds=(float(lengthScale)/boundsFactor, 1*lengthScale))
    whiteKernel = ConstantKernel(1.0, constant_value_bounds="fixed") * WhiteKernel(noise_level=estimNoise, noise_level_bounds=(float(estimNoise)/boundsFactor, boundsFactor*estimNoise))
    #rbfKernel = 1 * RBF(length_scale=lengthScale)
    #whiteKernel = 1 * WhiteKernel(noise_level=estimNoise)
    kernel = rbfKernel + whiteKernel
    #kernel = 1.0 * RBF(length_scale=lengthScale)
    print('\nbiomk %d  lengthScale %f  noise %f alpha %f'% (b, lengthScale, estimNoise, estimAlpha))
    #print estimAlpha.shape
    normalizeYflag = False
    #normalizeYflag = True

    gp = GaussianProcessRegressor(kernel=rbfKernel, alpha=estimAlpha, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=100, normalize_y=normalizeYflag)

    #gp = GaussianProcessRegressor(kernel=rbfKernel, alpha=estimAlpha, optimizer=None, n_restarts_optimizer=100, normalize_y=True)

    assert not any(np.isnan(X))
    assert not any(np.isnan(Y))
    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(X, Y)
    print("optimised kernel", gp.kernel_)#, "  theta", gp.kernel_.theta, " bounds", gp.kernel_.bounds)

    #gpNonOpt = GaussianProcessRegressor(kernel=rbfKernel, alpha=estimAlpha, optimizer=None, normalize_y=False)
    #gpNonOpt.fit(X,Y)
    #print("non-optimised kernel", gpNonOpt.kernel_)#,  "  theta", gpNonOpt.kernel_.theta, " bounds", gpNonOpt.kernel_.bounds)

    #gp = gpNonOpt

    # Make the prediction on the meshed x-axis (ask for Cov matrix as well)
    x_pred[:,b] = np.linspace(minX[b], maxX[b], nrPointsToEval)
    assert not any(np.isnan(x_pred[:,b]))
    dXdT_predCurr, cov_matrix = gp.predict(x_pred[:,b].reshape(-1,1), return_cov=True)

    # make sure dXdT is not too low, otherwise truncate the [minX, maxX] interval
    dXdTthresh = 1e-10
    tooLowMask = np.abs(np.ravel(dXdT_predCurr)) < dXdTthresh
    print(tooLowMask.shape)
    if np.sum(tooLowMask) > nrPointsToEval/10:
      print("Warning dXdT is too low, will restict the [minxX, maxX] interval")
      goodIndicesMask = np.logical_not(tooLowMask)
      #print(x_pred.shape, goodIndicesMask.shape)
      #print(x_pred[goodIndicesMask, b])
      minX[b] = min(x_pred[goodIndicesMask,b])
      maxX[b] = max(x_pred[goodIndicesMask,b])
      x_pred[:, b] = np.linspace(minX[b], maxX[b], nrPointsToEval)
      dXdT_predCurr, cov_matrix = gp.predict(x_pred[:,b].reshape(-1,1), return_cov=True)


    MSE = np.diagonal(cov_matrix)

    dXdT_pred[:,b] = np.ravel(dXdT_predCurr)
    sigma_pred[:,b] = np.ravel(np.sqrt(MSE))
    samples = gp.sample_y(x_pred[:,b].reshape(-1,1), n_samples=nrSamples, random_state=0)
    posteriorSamples[:,:,b] = np.squeeze(samples).T

    # renormalize the Xs and Ys
    # x_pred[:,b] = x_pred[:,b] * allXStd[b] + allXMean[b]
    # dXdT_pred[:,b] = dXdT_pred[:,b] * alldXdTStd[b] + alldXdTMean[b]
    # sigma_pred[:,b] = sigma_pred[:,b] * alldXdTStd[b]
    # posteriorSamples[:,:,b] = posteriorSamples[:,:,b]*alldXdTStd[b] + alldXdTMean[b]

    # renormalize the Xs and Ys
    # x_pred[:, b] = x_pred[:, b] * patXStd[b] + patXMean[b]
    # dXdT_pred[:, b] = dXdT_pred[:, b] * patdXdTStd[b] + patdXdTMean[b]
    # sigma_pred[:, b] = sigma_pred[:, b] * patdXdTStd[b]
    # posteriorSamples[:, :, b] = posteriorSamples[:, :, b] * patdXdTStd[b] + patdXdTMean[b]

    x_pred[:, b] = x_pred[:, b] * patXStd[b] + patXMean[b]
    dXdT_pred[:, b] = dXdT_pred[:, b] * patdXdTStd[b]
    sigma_pred[:, b] = sigma_pred[:, b] * patdXdTStd[b]
    posteriorSamples[:, :, b] = posteriorSamples[:, :, b] * patdXdTStd[b]

    # diagCol = plotTrajParams['diagColors']
    # fig = pl.figure(1)
    # nrDiags = np.unique(diag).shape[0]
    # for diagNr in range(1, nrDiags + 1):
    #   print(avgXdata.shape, diag.shape, dXdTdata.shape, diagCol, diagNr)
    #   pl.scatter(avgXdata[diag == diagNr, b], dXdTdata[diag == diagNr, b], color = diagCol[diagNr - 1])
    #
    # modelCol = 'r' # red
    # pl.plot(x_pred[:, b], dXdT_pred[:, b], '%s-' % modelCol, label = u'Prediction')
    # pl.fill(np.concatenate([x_pred[:, b], x_pred[::-1, b]]), np.concatenate(
    #   [dXdT_pred[:, b] - 1.9600 * sigma_pred[:, b], (dXdT_pred[:, b] + 1.9600 * sigma_pred[:, b])[::-1]]), alpha = .5,
    #         fc = modelCol, ec = 'None', label = '95% confidence interval')
    # for s in range(nrSamples):
    #   pl.plot(x_pred[:, b], posteriorSamples[s, :, b])
    # fig.show()

    gpParams = gp.get_params(deep=True)
    #print 'kernel', gp.kernel
    #print 'gpParams', gpParams

    gpList.append(gp)

  #print(adsa)

  return x_pred, dXdT_pred, sigma_pred, gpList, posteriorSamples
示例#6
0
class GaussianProcessRegressionModel(Model):
    """
    Gaussian Process Regression Model

    """
    def __init__(self,
                 describer,
                 kernel_category='RBF',
                 restarts=10,
                 **kwargs):
        """

        Args:
            describer (Describer): Describer to convert
                input object to descriptors.
            kernel_category (str): Name of kernel from
                sklearn.gaussian_process.kernels. Default to 'RBF', i.e.,
                squared exponential.
            restarts (int): The number of restarts of the optimizer for
                finding the kernel’s parameters which maximize the
                log-marginal likelihood.
            kwargs: kwargs to be passed to kernel object, e.g. length_scale,
                length_scale_bounds.
        """
        self.describer = describer
        kernel = getattr(kernels, kernel_category)(**kwargs)
        self.model = GaussianProcessRegressor(kernel=kernel,
                                              n_restarts_optimizer=restarts)
        self._xtrain = None
        self._xtest = None

    def fit(self, inputs, outputs, override=False):
        """
        Args:
            inputs (list): List of input training objects.
            outputs (list): List/Array of output values
                (supervisory signals).
            override: (bool) Whether to calculate the feature
                vectors from given inputs. Default to False. Set to True if
                you want to retrain the model with a different set of
                training inputs.
        """
        if not self._xtrain or override:
            xtrain = self.describer.describe_all(inputs)
        else:
            warnings.warn("Feature vectors retrieved from cache "
                          "and input training objects ignored. "
                          "To override the old cache with feature vectors "
                          "of new training objects, set override=True.")
            xtrain = self._xtrain
        self.model.fit(xtrain, outputs)
        self._xtrain = xtrain

    def predict(self, inputs, override=False, **kwargs):
        """
        Args:
            inputs (List): List of input testing objects.
            override: (bool) Whether to calculate the feature
                vectors from given inputs. Default to False. Set to True if
                you want to test the model with a different set of testing inputs.
            kwargs: kwargs to be passed to predict method, e.g.
                return_std, return_cov.
        Returns:
            Predicted output array from inputs.
        """
        if self._xtest is None or override:
            xtest = self.describer.describe_all(inputs)
        else:
            warnings.warn("Feature vectors retrieved from cache "
                          "and input testing objects ignored. "
                          "To override the old cache with feature vectors "
                          "of new testing objects, set override=True.")
            xtest = self._xtest
        self._xtest = xtest
        return self.model.predict(xtest, **kwargs)

    @property
    def params(self):
        return self.model.get_params()

    def save(self, model_fname):
        joblib.dump(self.model, '%s.pkl' % model_fname)

    def load(self, model_fname):
        self.model = joblib.load(model_fname)
示例#7
0
文件: gp.py 项目: Quickblink/rl-hpo
from scipy.stats import norm, multivariate_normal

x = np.linspace(0, 1, 100)
x1 = np.linspace(-5, 0, 100)
x2 = np.linspace(0, 5, 100)

K = norm.pdf(10 * np.abs(np.subtract(*np.meshgrid(x, x))))
plt.figure()
for _ in range(10):
    plt.plot(x, multivariate_normal.rvs(np.zeros(100, dtype=np.float), K))

#X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = RBF(0.1)  #DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel, random_state=0,
                               optimizer=None)  #.fit(X, y)

plt.figure()
for i in range(10):
    y1 = gpr.sample_y(x[:, None], random_state=i).squeeze()
    #gpr.fit(x1[:, None], y1)
    plt.plot(x, y1)
    #plt.plot(x2, gpr.sample_y(x2[:, None], random_state=i).squeeze())

plt.show()

#print(gpr.score(X, y))

#print(gpr.predict(X[:2,:], return_std=True))

print(gpr.get_params())
示例#8
0
class SklearnModel(BaseEstimator, TransformerMixin):
    """
    Class to wrap any sklearn estimator, and provide some new dataframe functionality

    Args:
        model: (str), string denoting the name of an sklearn estimator object, e.g. KernelRidge

        kwargs: keyword pairs of values to include for model, e.g. for KernelRidge can specify kernel, alpha, gamma values

    Methods:
        fit: method that fits the model parameters to the provided training data
            Args:
                X: (pd.DataFrame), dataframe of X features

                y: (pd.Series), series of y target data

            Returns:
                fitted model

        predict: method that evaluates model on new data to give predictions
            Args:
                X: (pd.DataFrame), dataframe of X features

                as_frame: (bool), whether to return data as pandas dataframe (else numpy array)

            Returns:
                series or array of predicted values

        help: method to output key information on class use, e.g. methods and parameters
            Args:
                None

            Returns:
                None, but outputs help to screen
    """
    def __init__(self, model, **kwargs):
        if model == 'XGBoostRegressor':
            self.model = xgboost.XGBRegressor(**kwargs)
        elif model == 'GaussianProcessRegressor':
            kernel = kwargs['kernel']
            kernel = _make_gpr_kernel(kernel_string=kernel)
            del kwargs['kernel']
            self.model = GaussianProcessRegressor(kernel=kernel, **kwargs)
        else:
            self.model = dict(sklearn.utils.all_estimators())[model](**kwargs)

    def fit(self, X, y):
        return self.model.fit(X, y)

    def predict(self, X, as_frame=True):
        if as_frame == True:
            return pd.DataFrame(self.model.predict(X),
                                columns=['y_pred']).squeeze()
        else:
            return self.model.predict(X).ravel()

    def get_params(self, deep=True):
        return self.model.get_params(deep)

    def help(self):
        print('Documentation for', self.model)
        pprint(dict(inspect.getmembers(self.model))['__doc__'])
        print('\n')
        print('Class methods for,', self.model)
        pprint(dict(inspect.getmembers(self.model,
                                       predicate=inspect.ismethod)))
        print('\n')
        print('Class attributes for,', self.model)
        pprint(self.model.__dict__)
        return
示例#9
0
     [4.29724834, -13.63014662, 4.96232973],
     [4.59071543, -11.92423152, 4.81544906],
     [4.2949693, -12.87675871, 5.16477966],
     [4.10585292, -13.50090225, 4.84599718],
     [6.91792365, -11.29063318, -5.07800206],
     [6.78145595, -11.1925924, -7.11770544],
     [6.70614104, -12.32381859, -14.9805829],
     [7.81653159, -12.01413346, -8.49294406],
     [6.17192181, -12.4810872, -7.79349506],
     [4.02251458, -10.1751462, 3.58618972],
     [6.08898904, -11.6674992, 0.28110092],
     [6.20738109, -12.08157436, 0.05359656],
     [5.61603719, -12.39538706, 1.1257978],
     [5.84118061, -12.58242897, 1.78389973]]
y = [[0.957822501659], [0.958218336105], [0.95780223608], [0.960562646389],
     [0.964764118195],
     [
         0.999685406685, 0.999685406685, 0.999685406685, 0.999675273895,
         0.999685406685
     ],
     [
         0.985243976116, 0.968725013042, 0.958451747894, 0.954782373101,
         0.958218336105
     ]]
gp.fit(X, y)

params = gp.get_params()

with open('gpParams.json', 'w') as fp:
    json.dump(params, fp)
示例#10
0
def gpr_heightmap(init_values,
                  x,
                  y,
                  kernel=None,
                  alpha=1e-10,
                  min_height=0,
                  max_height=255,
                  dtype=np.int):
    r"""
    Generate a heightmap using gaussian process regression. The advantages of using this method over others to
    generate terrains lies in the capacity of adding prior knowledge through the kernel and the given initial values.
    For instance, using a RBF kernel means that we want a smooth terrain instead of a bumpy one.
    Furthermore, it allows to generate heightmaps which are not necessary square; i.e. they can be rectangular. 

    Warnings: this is pretty difficult to exploit if the given data is not consistent. See `heigthmap_rbf` for
        a better way to generate heightmap.

    Args:
        init_values (np.array[M,3]): list of `M` 3D points which corresponds to initial values that are used to fit
            the gaussian process.
        x (np.array[N], np.array[N,O]): If 1d array, it will compute the meshgrid. Otherwise, the resulting 2D array
            from the meshgrid is expected. This is used to predict the heightmap at the given points.
        y (np.array[0], np.array[N,O]): If 1d array, it will compute the meshgrid. Otherwise, the resulting 2D array
            from the meshgrid is expected. This is used to predict the heightmap at the given points.
        kernel (None, sklearn.gaussian_process.kernels.Kernel): "The kernel specifying the covariance function of
            the GP. If None is passed, the kernel '1.0 * RBF(1.0)' is used as default. Note that the kernel's
            hyperparameters are optimized during fitting" [2]
        alpha (float, array_like): "Value added to the diagonal of the kernel matrix during fitting. Larger values
            correspond to increased noise level in the observations. This can also prevent a potential numerical issue
            during fitting, by ensuring that the calculated values form a positive definite matrix. If an array is
            passed, it must have the same number of entries as the data used for fitting and is used as
            datapoint-dependent noise level. Note that this is equivalent to adding a WhiteKernel with c=alpha.
            Allowing to specify the noise level directly as a parameter is mainly for convenience and for consistency
            with Ridge." [2]
        min_height (int,float): lower bound; each value in the heightmap will be higher than or equal to this bound
        max_height (int,float): upper bound; each value in the heightmap will be lower than or equal to this bound
        dtype (np.int, np.float): type of the returned array for the heightmap

    Returns:
        np.array[N,O]: resulting 2D heightmap

    Examples:
        >>> # generate heightmap using gaussian process regression
        >>> x = np.array(range(256))
        >>> y = np.array(range(256))
        >>> N_init = 20
        >>> x_init = np.random.randint(low=x.min(), high=x.max(), size=N_init)
        >>> y_init = np.random.randint(low=y.min(), high=y.max(), size=N_init)
        >>> z_init = np.random.randint(low=0, high=20, size=N_init)
        >>> init_values = np.vstack((x_init, y_init, z_init)).T     # shape: Nx3
        >>> heightmap = gpr_heightmap(init_values, x, y)

    References:
        - [1] "Gaussian Processes for Machine Learning", Rasmussen and Williams, 2006
        - [2] Sklearn: https://scikit-learn.org/stable/modules/gaussian_process.html
    """
    # check given x and y
    if len(x.shape) == 1 and len(y.shape) == 1:
        x, y = np.meshgrid(x, y)
    if x.shape != y.shape:
        raise ValueError(
            "Expecting x and y to have the same shape, which should be the case if it is a meshgrid"
        )

    # compute the minimum distance between points
    N = len(init_values)
    min_dist = np.inf
    for i in range(N):
        for j in range(i + 1, N):
            dist = np.linalg.norm(init_values[i, :2] - init_values[j, :2])
            if dist < min_dist:
                min_dist = dist
    print("Min dist: {}".format(min_dist))

    # check initial values
    if not isinstance(init_values, np.ndarray):
        raise TypeError("Expecting init_values to be a numpy array")
    if init_values.shape[1] != 3:
        raise ValueError(
            "Expecting a numpy array of 3D points for init_values")

    # create gaussian process and fit on the given initial values
    kernel = RBF(length_scale=np.sqrt(min_dist))
    gpr = GaussianProcessRegressor(kernel=kernel,
                                   alpha=alpha,
                                   normalize_y=True)
    gpr.fit(init_values[:, :2], init_values[:, 2])

    # predict the heightmap using GPR
    X = np.dstack((x, y)).reshape(-1, 2)
    heightmap = gpr.predict(X)
    heightmap = heightmap.reshape(x.shape)

    print("Params: {}".format(gpr.get_params()))

    # make sure the values of the heightmap are between the bounds (in-place), and is the correct type
    np.clip(heightmap, min_height, max_height, heightmap)
    heightmap.astype(dtype)

    return heightmap
示例#11
0
            count += 1
print(count / 400000)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
import sklearn.gaussian_process.kernels as kl

a1 = np.random.normal(1, 1.5, 50).reshape(50, 1)
# a2=np.random.normal(10.6, 8.6, 50).reshape(50,1)

# b=a1-np.random.random(5).reshape(5,1)
b = np.random.laplace(2, 1.1, 50).reshape(50, 1)
# plt.scatter(a1,b,marker = 'o', color = 'r', label='3', s = 15)
# plt.show()

gaussian = GaussianProcessRegressor(
    kernel=kl.RBF(5.0, length_scale_bounds='fixed'))
fiting = gaussian.fit(a1, b)

gaussian.get_params(True)

# c=np.linspace(a1.min()-0.1,a1.max()+0.1,50)
c = np.linspace(a1.min(), a1.max(), 20)
d = gaussian.predict(c.reshape(20, 1), True)
plt.scatter(a1, b, marker='o', color='r', label='3', s=15)
plt.plot(c, d[0])
plt.plot(c, d[0] + (d[1] * 200).reshape(20, 1))
plt.plot(c, d[0] - (d[1] * 200).reshape(20, 1))
plt.show()
示例#12
0
#dy1 += noise

#print 'test y', y
#print 'test dy', dyf
#print 'test x', X

# Instanciate a Gaussian Process model
gp = GaussianProcessRegressor(
    kernel=kernel,
    alpha=(dyf / y)**2,  #(dyf / y) ** 2,
    n_restarts_optimizer=500)

# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)

print 'Parameters:', gp.get_params(deep=True)

print 'Score:', gp.score(X, y)

# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(x, return_std=True)
#print 'Sigma:', sigma

likel = y_pred - sigma
m = max(y_pred)
mp = [i for i, j in enumerate(y_pred) if j == m]
Mag400p = -((np.log10(y_pred[mp] * fm) - 31.4) / 0.4)
Mag400p_err = (-(np.log10(y_pred[mp] * fm) - 31.4) /
               0.4) - (-(np.log10(likel[mp] * fm) - 31.4) / 0.4)

print 'Peak item, Mag, Mag-err, Peak:', mp, Mag400p, Mag400p_err, x[mp]
示例#13
0
    #y_sample_yo = gp.sample_y(Xtr_1, 1)
    try:
        gp.fit(Xtr, Ytr)
        print "marginal likelihood:", gp.log_marginal_likelihood()
        y_pred, y_sigma = gp.predict(Xtst, return_std=True)
        print(y_pred.shape)

        result_time = [g + 1 for g in range(D, r)]

        s = "time interval between "+str(result_time[0])+" and "+str(result_time[-1])+\
        " minutes\n window is "+str(D)
        plt.xlabel(s=s)
        ylab = labels[z]
        plt.ylabel(ylab)
        o = "Using "+str(gp.get_params()['kernel'])+" kernel\nwith "+str(total_samp)+" averaged training samples\nand "+str(r)+\
        " averaged test samples"
        plt.title(s=o)

        #ploting data
        #plt.plot(result_time, y_sample_yo, "c-", label= "kernel sample")
        plt.plot(result_time, Ycomp, "y-", label="training")
        plt.plot(result_time, y_pred.T[0], "g-", label="predicted")
        plt.plot(result_time, Ytst.T[0], "m-", label="real")
        plt.fill(np.concatenate([result_time, result_time[::-1]]),
                 np.concatenate([
                     y_pred - 1.96 * y_sigma, (y_pred + 1.96 * y_sigma)[::-1]
                 ]),
                 alpha=.5,
                 fc='b',
                 ec='none')