def test_only_score_contains_sample_weight(): mlflow.sklearn.autolog() from sklearn.gaussian_process import GaussianProcessRegressor assert "sample_weight" not in _get_arg_names(GaussianProcessRegressor.fit) assert "sample_weight" in _get_arg_names(GaussianProcessRegressor.score) mock_obj = mock.Mock() def mock_score(self, X, y, sample_weight=None): # pylint: disable=unused-argument mock_obj(X, y, sample_weight) return 0 assert inspect.signature( GaussianProcessRegressor.score) == inspect.signature(mock_score) GaussianProcessRegressor.score = mock_score model = GaussianProcessRegressor() X, y = get_iris() with mlflow.start_run() as run: model.fit(X, y) mock_obj.assert_called_once_with(X, y, None) run_id = run.info.run_id params, metrics, tags, artifacts = get_run_data(run_id) assert params == truncate_dict( stringify_dict_values(model.get_params(deep=True))) assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items() assert tags == get_expected_class_tags(model) assert MODEL_DIR in artifacts assert_predict_equal(load_model_by_run_id(run_id), model, X)
def GPRTraining(XEstimate,XValidate,Parameters,class_labels): kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-05, 100000.0)) #clf = GaussianProcessClassifier(kernel=kernel, n_restarts_optimizer=1) #clf = GaussianProcessRegressor(kernel= RBF(length_scale=1.0), optimizer=None) kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) clf = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=1) clf.fit(XEstimate, class_labels) Yvalidate = clf.predict(XValidate) EstParameters = clf.get_params() return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
def makeGaussianProcess(): global y_t_pred, result prefix = "%s_GP_FULL" % (name) #kernel = RBF(1e1,(1e-5,1e7)) kernel = RationalQuadratic() #(1e1,(1e-5,1e7)) #kernel = ExpSineSquared()#(1e1,(1e-5,1e7)) model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) x1 = x[:, 3:6:2] x_t1 = x_t[:, 3:6:2] y_t_pred = model.fit(x1, y).predict(x_t1) r = model.score(x1, y) print("score r = %s" % r) print "Coefficients: %s" % model.get_params() #print "Highest Coefficients: %s" % str(sorted(model.get_params(),key=lambda x:-x)) print str( (model.kernel_, model.log_marginal_likelihood(model.kernel_.theta))) return prefix, model
# kern = gp.kernels.RBF() + gp.kernels.WhiteKernel() model = GaussianProcessRegressor( kernel=kern, # kernel instance, default=None alpha=0.01, # float or array-like of shape(n_sample), default=1e-10 optimizer= "fmin_l_bfgs_b", # "fmin_l_bfgs_b” or callable, default="fmin_l_bfgs_b" n_restarts_optimizer=0, # int, default=0 normalize_y=False, # boolean, optional (default: False) copy_X_train=True, # bool, default=True random_state=None, # int or RandomState, default=None ) model.fit(train_x, train_y) y_pred, y_std = model.predict(x.reshape(-1, 1), return_std=True) # y_pred, y_std = model.predict(x.reshape(-1, 1), return_std=True) log_marginal_likelihood = model.log_marginal_likelihood() # 対数周辺尤度 params = model.get_params() # 設定パラメータの取得(辞書) scores = model.score(train_x, train_y) # 決定係数R^2 # params = model.set_params() # 設定パラメータの設定(辞書) k_samples = model.sample_y(train_x, n_samples=5) # 事後分布のカーネル関数をランダムに5つサンプリング X_train = model.X_train_ y_train = model.y_train_ kernel = model.kernel_ # 予測に使用されたカーネル(最適化済みで最初に設定したパラメータとは異なる) L = model.L_ alpha = model.alpha_ log_marginal_likelihood_value = model.log_marginal_likelihood_value_ # 対数周辺尤度 # plot fig = plt.figure(figsize=(6, 4)) ax1 = fig.add_subplot(111) for i in range(k_samples.shape[1]):
def fitGaussianProc(patDXdTdata, patAvgXdata, params): ''' Fits a GP on the change data (x, dx/dt) Parameters ---------- patDXdTdata patAvgXdata estimNoise lengthScaleFactors plotTrajParams Returns ------- ''' # Mesh the input space for evaluations of the real function, the prediction and # its MSE assert(CTL == 1) nrBiomk = patDXdTdata.shape[0] #minX = np.amin(patAvgXdata, axis=0) #maxX = np.amax(patAvgXdata, axis=0) minX = np.array([np.nanmin(patAvgXdata[b], axis=0) for b in range(nrBiomk)]) maxX = np.array([np.nanmax(patAvgXdata[b], axis=0) for b in range(nrBiomk)]) assert not any(np.isnan(minX)) assert not any(np.isnan(maxX)) intervalSize = maxX-minX minX -= intervalSize/0.5 maxX += intervalSize/0.5 #print minX.shape, maxX.shape nrPointsToEval = 5000 x_pred = np.zeros((nrPointsToEval, nrBiomk),float) dXdT_pred = np.zeros((nrPointsToEval, nrBiomk),float) sigma_pred = np.zeros((nrPointsToEval, nrBiomk),float) nrSamples = 100 posteriorSamples = np.zeros((nrSamples, nrPointsToEval, nrBiomk),float) # print(avgXdata.shape, diag.shape) # print(avgXdata[diag == CTL,:].shape) # ctlXMean = np.nanmean(avgXdata[diag == CTL,:], axis = 0) # ctlXStd = np.nanstd(avgXdata[diag == CTL,:], axis = 0) # ctldXdTMean = np.nanmean(dXdTdata[diag == CTL,:], axis = 0) # ctldXdTStd = np.nanstd(dXdTdata[diag == CTL,:], axis = 0) # allXMean = np.nanmean(avgXdata, axis = 0) # allXStd = np.nanstd(avgXdata, axis = 0) # alldXdTMean = np.nanmean(dXdTdata, axis = 0) # alldXdTStd = np.nanstd(dXdTdata, axis = 0) patXMean = np.array([np.nanmean(patAvgXdata[b], axis=0) for b in range(nrBiomk)]) patXStd = np.array([np.nanstd(patAvgXdata[b], axis=0) for b in range(nrBiomk)]) patdXdTMean = np.array([np.nanmean(patDXdTdata[b], axis=0) for b in range(nrBiomk)]) patdXdTStd = np.array([np.nanstd(patDXdTdata[b], axis=0) for b in range(nrBiomk)]) gpList = [] for b in range(nrBiomk): points = np.linspace(minX[b], maxX[b], nrPointsToEval) #print points.shape X = patAvgXdata[b] Y = patDXdTdata[b] notNanInd = np.logical_not(np.isnan(X)) X = X[notNanInd] Y = Y[notNanInd] X = X.reshape(-1,1) Y = Y.reshape(-1,1) # X = (X - allXMean[b]) / allXStd[b] # standardizing the inputs and outputs # Y = (Y - alldXdTMean[b]) / alldXdTStd[b] # minX[b] = (minX[b] - allXMean[b]) / allXStd[b] # maxX[b] = (maxX[b] - allXMean[b]) / allXStd[b] X = (X - patXMean[b]) / patXStd[b] # standardizing the inputs and outputs # Y = (Y - patdXdTMean[b]) / patdXdTStd[b] Y = Y / patdXdTStd[b] minX[b] = (minX[b] - patXMean[b]) / patXStd[b] maxX[b] = (maxX[b] - patXMean[b]) / patXStd[b] #print 'Xshape, Yshape', X.shape, Y.shape lower, upper = np.abs(1/np.max(X)), np.abs(1/(np.min(X)+1e-6)) if lower > upper: lower, upper = upper, lower mid = 1/np.abs(np.mean(X)) # print("X", X[:20],'Y', Y[:20]) # print(minX, maxX) #lengthScale = (np.max(X)-np.min(X)) lengthScale = params['lengthScaleFactors'][b] * (np.max(X) - np.min(X))/2 estimNoise = np.var(Y)/2 # this should be variance, as it is placed as is on the diagonal of the kernel, which is a covariance matrix #estimAlpha = np.ravel((np.std(Y))**2) #estimAlpha = np.var(Y)/2 estimAlpha = np.std(Y)*2 boundsFactor = 2.0 #estimAlpha = 0 #need to specity bounds as the lengthScale is optimised in the fit rbfKernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(length_scale=lengthScale, length_scale_bounds=(float(lengthScale)/boundsFactor, 1*lengthScale)) whiteKernel = ConstantKernel(1.0, constant_value_bounds="fixed") * WhiteKernel(noise_level=estimNoise, noise_level_bounds=(float(estimNoise)/boundsFactor, boundsFactor*estimNoise)) #rbfKernel = 1 * RBF(length_scale=lengthScale) #whiteKernel = 1 * WhiteKernel(noise_level=estimNoise) kernel = rbfKernel + whiteKernel #kernel = 1.0 * RBF(length_scale=lengthScale) print('\nbiomk %d lengthScale %f noise %f alpha %f'% (b, lengthScale, estimNoise, estimAlpha)) #print estimAlpha.shape normalizeYflag = False #normalizeYflag = True gp = GaussianProcessRegressor(kernel=rbfKernel, alpha=estimAlpha, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=100, normalize_y=normalizeYflag) #gp = GaussianProcessRegressor(kernel=rbfKernel, alpha=estimAlpha, optimizer=None, n_restarts_optimizer=100, normalize_y=True) assert not any(np.isnan(X)) assert not any(np.isnan(Y)) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, Y) print("optimised kernel", gp.kernel_)#, " theta", gp.kernel_.theta, " bounds", gp.kernel_.bounds) #gpNonOpt = GaussianProcessRegressor(kernel=rbfKernel, alpha=estimAlpha, optimizer=None, normalize_y=False) #gpNonOpt.fit(X,Y) #print("non-optimised kernel", gpNonOpt.kernel_)#, " theta", gpNonOpt.kernel_.theta, " bounds", gpNonOpt.kernel_.bounds) #gp = gpNonOpt # Make the prediction on the meshed x-axis (ask for Cov matrix as well) x_pred[:,b] = np.linspace(minX[b], maxX[b], nrPointsToEval) assert not any(np.isnan(x_pred[:,b])) dXdT_predCurr, cov_matrix = gp.predict(x_pred[:,b].reshape(-1,1), return_cov=True) # make sure dXdT is not too low, otherwise truncate the [minX, maxX] interval dXdTthresh = 1e-10 tooLowMask = np.abs(np.ravel(dXdT_predCurr)) < dXdTthresh print(tooLowMask.shape) if np.sum(tooLowMask) > nrPointsToEval/10: print("Warning dXdT is too low, will restict the [minxX, maxX] interval") goodIndicesMask = np.logical_not(tooLowMask) #print(x_pred.shape, goodIndicesMask.shape) #print(x_pred[goodIndicesMask, b]) minX[b] = min(x_pred[goodIndicesMask,b]) maxX[b] = max(x_pred[goodIndicesMask,b]) x_pred[:, b] = np.linspace(minX[b], maxX[b], nrPointsToEval) dXdT_predCurr, cov_matrix = gp.predict(x_pred[:,b].reshape(-1,1), return_cov=True) MSE = np.diagonal(cov_matrix) dXdT_pred[:,b] = np.ravel(dXdT_predCurr) sigma_pred[:,b] = np.ravel(np.sqrt(MSE)) samples = gp.sample_y(x_pred[:,b].reshape(-1,1), n_samples=nrSamples, random_state=0) posteriorSamples[:,:,b] = np.squeeze(samples).T # renormalize the Xs and Ys # x_pred[:,b] = x_pred[:,b] * allXStd[b] + allXMean[b] # dXdT_pred[:,b] = dXdT_pred[:,b] * alldXdTStd[b] + alldXdTMean[b] # sigma_pred[:,b] = sigma_pred[:,b] * alldXdTStd[b] # posteriorSamples[:,:,b] = posteriorSamples[:,:,b]*alldXdTStd[b] + alldXdTMean[b] # renormalize the Xs and Ys # x_pred[:, b] = x_pred[:, b] * patXStd[b] + patXMean[b] # dXdT_pred[:, b] = dXdT_pred[:, b] * patdXdTStd[b] + patdXdTMean[b] # sigma_pred[:, b] = sigma_pred[:, b] * patdXdTStd[b] # posteriorSamples[:, :, b] = posteriorSamples[:, :, b] * patdXdTStd[b] + patdXdTMean[b] x_pred[:, b] = x_pred[:, b] * patXStd[b] + patXMean[b] dXdT_pred[:, b] = dXdT_pred[:, b] * patdXdTStd[b] sigma_pred[:, b] = sigma_pred[:, b] * patdXdTStd[b] posteriorSamples[:, :, b] = posteriorSamples[:, :, b] * patdXdTStd[b] # diagCol = plotTrajParams['diagColors'] # fig = pl.figure(1) # nrDiags = np.unique(diag).shape[0] # for diagNr in range(1, nrDiags + 1): # print(avgXdata.shape, diag.shape, dXdTdata.shape, diagCol, diagNr) # pl.scatter(avgXdata[diag == diagNr, b], dXdTdata[diag == diagNr, b], color = diagCol[diagNr - 1]) # # modelCol = 'r' # red # pl.plot(x_pred[:, b], dXdT_pred[:, b], '%s-' % modelCol, label = u'Prediction') # pl.fill(np.concatenate([x_pred[:, b], x_pred[::-1, b]]), np.concatenate( # [dXdT_pred[:, b] - 1.9600 * sigma_pred[:, b], (dXdT_pred[:, b] + 1.9600 * sigma_pred[:, b])[::-1]]), alpha = .5, # fc = modelCol, ec = 'None', label = '95% confidence interval') # for s in range(nrSamples): # pl.plot(x_pred[:, b], posteriorSamples[s, :, b]) # fig.show() gpParams = gp.get_params(deep=True) #print 'kernel', gp.kernel #print 'gpParams', gpParams gpList.append(gp) #print(adsa) return x_pred, dXdT_pred, sigma_pred, gpList, posteriorSamples
class GaussianProcessRegressionModel(Model): """ Gaussian Process Regression Model """ def __init__(self, describer, kernel_category='RBF', restarts=10, **kwargs): """ Args: describer (Describer): Describer to convert input object to descriptors. kernel_category (str): Name of kernel from sklearn.gaussian_process.kernels. Default to 'RBF', i.e., squared exponential. restarts (int): The number of restarts of the optimizer for finding the kernel’s parameters which maximize the log-marginal likelihood. kwargs: kwargs to be passed to kernel object, e.g. length_scale, length_scale_bounds. """ self.describer = describer kernel = getattr(kernels, kernel_category)(**kwargs) self.model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=restarts) self._xtrain = None self._xtest = None def fit(self, inputs, outputs, override=False): """ Args: inputs (list): List of input training objects. outputs (list): List/Array of output values (supervisory signals). override: (bool) Whether to calculate the feature vectors from given inputs. Default to False. Set to True if you want to retrain the model with a different set of training inputs. """ if not self._xtrain or override: xtrain = self.describer.describe_all(inputs) else: warnings.warn("Feature vectors retrieved from cache " "and input training objects ignored. " "To override the old cache with feature vectors " "of new training objects, set override=True.") xtrain = self._xtrain self.model.fit(xtrain, outputs) self._xtrain = xtrain def predict(self, inputs, override=False, **kwargs): """ Args: inputs (List): List of input testing objects. override: (bool) Whether to calculate the feature vectors from given inputs. Default to False. Set to True if you want to test the model with a different set of testing inputs. kwargs: kwargs to be passed to predict method, e.g. return_std, return_cov. Returns: Predicted output array from inputs. """ if self._xtest is None or override: xtest = self.describer.describe_all(inputs) else: warnings.warn("Feature vectors retrieved from cache " "and input testing objects ignored. " "To override the old cache with feature vectors " "of new testing objects, set override=True.") xtest = self._xtest self._xtest = xtest return self.model.predict(xtest, **kwargs) @property def params(self): return self.model.get_params() def save(self, model_fname): joblib.dump(self.model, '%s.pkl' % model_fname) def load(self, model_fname): self.model = joblib.load(model_fname)
from scipy.stats import norm, multivariate_normal x = np.linspace(0, 1, 100) x1 = np.linspace(-5, 0, 100) x2 = np.linspace(0, 5, 100) K = norm.pdf(10 * np.abs(np.subtract(*np.meshgrid(x, x)))) plt.figure() for _ in range(10): plt.plot(x, multivariate_normal.rvs(np.zeros(100, dtype=np.float), K)) #X, y = make_friedman2(n_samples=500, noise=0, random_state=0) kernel = RBF(0.1) #DotProduct() + WhiteKernel() gpr = GaussianProcessRegressor(kernel=kernel, random_state=0, optimizer=None) #.fit(X, y) plt.figure() for i in range(10): y1 = gpr.sample_y(x[:, None], random_state=i).squeeze() #gpr.fit(x1[:, None], y1) plt.plot(x, y1) #plt.plot(x2, gpr.sample_y(x2[:, None], random_state=i).squeeze()) plt.show() #print(gpr.score(X, y)) #print(gpr.predict(X[:2,:], return_std=True)) print(gpr.get_params())
class SklearnModel(BaseEstimator, TransformerMixin): """ Class to wrap any sklearn estimator, and provide some new dataframe functionality Args: model: (str), string denoting the name of an sklearn estimator object, e.g. KernelRidge kwargs: keyword pairs of values to include for model, e.g. for KernelRidge can specify kernel, alpha, gamma values Methods: fit: method that fits the model parameters to the provided training data Args: X: (pd.DataFrame), dataframe of X features y: (pd.Series), series of y target data Returns: fitted model predict: method that evaluates model on new data to give predictions Args: X: (pd.DataFrame), dataframe of X features as_frame: (bool), whether to return data as pandas dataframe (else numpy array) Returns: series or array of predicted values help: method to output key information on class use, e.g. methods and parameters Args: None Returns: None, but outputs help to screen """ def __init__(self, model, **kwargs): if model == 'XGBoostRegressor': self.model = xgboost.XGBRegressor(**kwargs) elif model == 'GaussianProcessRegressor': kernel = kwargs['kernel'] kernel = _make_gpr_kernel(kernel_string=kernel) del kwargs['kernel'] self.model = GaussianProcessRegressor(kernel=kernel, **kwargs) else: self.model = dict(sklearn.utils.all_estimators())[model](**kwargs) def fit(self, X, y): return self.model.fit(X, y) def predict(self, X, as_frame=True): if as_frame == True: return pd.DataFrame(self.model.predict(X), columns=['y_pred']).squeeze() else: return self.model.predict(X).ravel() def get_params(self, deep=True): return self.model.get_params(deep) def help(self): print('Documentation for', self.model) pprint(dict(inspect.getmembers(self.model))['__doc__']) print('\n') print('Class methods for,', self.model) pprint(dict(inspect.getmembers(self.model, predicate=inspect.ismethod))) print('\n') print('Class attributes for,', self.model) pprint(self.model.__dict__) return
[4.29724834, -13.63014662, 4.96232973], [4.59071543, -11.92423152, 4.81544906], [4.2949693, -12.87675871, 5.16477966], [4.10585292, -13.50090225, 4.84599718], [6.91792365, -11.29063318, -5.07800206], [6.78145595, -11.1925924, -7.11770544], [6.70614104, -12.32381859, -14.9805829], [7.81653159, -12.01413346, -8.49294406], [6.17192181, -12.4810872, -7.79349506], [4.02251458, -10.1751462, 3.58618972], [6.08898904, -11.6674992, 0.28110092], [6.20738109, -12.08157436, 0.05359656], [5.61603719, -12.39538706, 1.1257978], [5.84118061, -12.58242897, 1.78389973]] y = [[0.957822501659], [0.958218336105], [0.95780223608], [0.960562646389], [0.964764118195], [ 0.999685406685, 0.999685406685, 0.999685406685, 0.999675273895, 0.999685406685 ], [ 0.985243976116, 0.968725013042, 0.958451747894, 0.954782373101, 0.958218336105 ]] gp.fit(X, y) params = gp.get_params() with open('gpParams.json', 'w') as fp: json.dump(params, fp)
def gpr_heightmap(init_values, x, y, kernel=None, alpha=1e-10, min_height=0, max_height=255, dtype=np.int): r""" Generate a heightmap using gaussian process regression. The advantages of using this method over others to generate terrains lies in the capacity of adding prior knowledge through the kernel and the given initial values. For instance, using a RBF kernel means that we want a smooth terrain instead of a bumpy one. Furthermore, it allows to generate heightmaps which are not necessary square; i.e. they can be rectangular. Warnings: this is pretty difficult to exploit if the given data is not consistent. See `heigthmap_rbf` for a better way to generate heightmap. Args: init_values (np.array[M,3]): list of `M` 3D points which corresponds to initial values that are used to fit the gaussian process. x (np.array[N], np.array[N,O]): If 1d array, it will compute the meshgrid. Otherwise, the resulting 2D array from the meshgrid is expected. This is used to predict the heightmap at the given points. y (np.array[0], np.array[N,O]): If 1d array, it will compute the meshgrid. Otherwise, the resulting 2D array from the meshgrid is expected. This is used to predict the heightmap at the given points. kernel (None, sklearn.gaussian_process.kernels.Kernel): "The kernel specifying the covariance function of the GP. If None is passed, the kernel '1.0 * RBF(1.0)' is used as default. Note that the kernel's hyperparameters are optimized during fitting" [2] alpha (float, array_like): "Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to increased noise level in the observations. This can also prevent a potential numerical issue during fitting, by ensuring that the calculated values form a positive definite matrix. If an array is passed, it must have the same number of entries as the data used for fitting and is used as datapoint-dependent noise level. Note that this is equivalent to adding a WhiteKernel with c=alpha. Allowing to specify the noise level directly as a parameter is mainly for convenience and for consistency with Ridge." [2] min_height (int,float): lower bound; each value in the heightmap will be higher than or equal to this bound max_height (int,float): upper bound; each value in the heightmap will be lower than or equal to this bound dtype (np.int, np.float): type of the returned array for the heightmap Returns: np.array[N,O]: resulting 2D heightmap Examples: >>> # generate heightmap using gaussian process regression >>> x = np.array(range(256)) >>> y = np.array(range(256)) >>> N_init = 20 >>> x_init = np.random.randint(low=x.min(), high=x.max(), size=N_init) >>> y_init = np.random.randint(low=y.min(), high=y.max(), size=N_init) >>> z_init = np.random.randint(low=0, high=20, size=N_init) >>> init_values = np.vstack((x_init, y_init, z_init)).T # shape: Nx3 >>> heightmap = gpr_heightmap(init_values, x, y) References: - [1] "Gaussian Processes for Machine Learning", Rasmussen and Williams, 2006 - [2] Sklearn: https://scikit-learn.org/stable/modules/gaussian_process.html """ # check given x and y if len(x.shape) == 1 and len(y.shape) == 1: x, y = np.meshgrid(x, y) if x.shape != y.shape: raise ValueError( "Expecting x and y to have the same shape, which should be the case if it is a meshgrid" ) # compute the minimum distance between points N = len(init_values) min_dist = np.inf for i in range(N): for j in range(i + 1, N): dist = np.linalg.norm(init_values[i, :2] - init_values[j, :2]) if dist < min_dist: min_dist = dist print("Min dist: {}".format(min_dist)) # check initial values if not isinstance(init_values, np.ndarray): raise TypeError("Expecting init_values to be a numpy array") if init_values.shape[1] != 3: raise ValueError( "Expecting a numpy array of 3D points for init_values") # create gaussian process and fit on the given initial values kernel = RBF(length_scale=np.sqrt(min_dist)) gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, normalize_y=True) gpr.fit(init_values[:, :2], init_values[:, 2]) # predict the heightmap using GPR X = np.dstack((x, y)).reshape(-1, 2) heightmap = gpr.predict(X) heightmap = heightmap.reshape(x.shape) print("Params: {}".format(gpr.get_params())) # make sure the values of the heightmap are between the bounds (in-place), and is the correct type np.clip(heightmap, min_height, max_height, heightmap) heightmap.astype(dtype) return heightmap
count += 1 print(count / 400000) import numpy as np import matplotlib.pyplot as plt from sklearn.gaussian_process import GaussianProcessRegressor import sklearn.gaussian_process.kernels as kl a1 = np.random.normal(1, 1.5, 50).reshape(50, 1) # a2=np.random.normal(10.6, 8.6, 50).reshape(50,1) # b=a1-np.random.random(5).reshape(5,1) b = np.random.laplace(2, 1.1, 50).reshape(50, 1) # plt.scatter(a1,b,marker = 'o', color = 'r', label='3', s = 15) # plt.show() gaussian = GaussianProcessRegressor( kernel=kl.RBF(5.0, length_scale_bounds='fixed')) fiting = gaussian.fit(a1, b) gaussian.get_params(True) # c=np.linspace(a1.min()-0.1,a1.max()+0.1,50) c = np.linspace(a1.min(), a1.max(), 20) d = gaussian.predict(c.reshape(20, 1), True) plt.scatter(a1, b, marker='o', color='r', label='3', s=15) plt.plot(c, d[0]) plt.plot(c, d[0] + (d[1] * 200).reshape(20, 1)) plt.plot(c, d[0] - (d[1] * 200).reshape(20, 1)) plt.show()
#dy1 += noise #print 'test y', y #print 'test dy', dyf #print 'test x', X # Instanciate a Gaussian Process model gp = GaussianProcessRegressor( kernel=kernel, alpha=(dyf / y)**2, #(dyf / y) ** 2, n_restarts_optimizer=500) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) print 'Parameters:', gp.get_params(deep=True) print 'Score:', gp.score(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) #print 'Sigma:', sigma likel = y_pred - sigma m = max(y_pred) mp = [i for i, j in enumerate(y_pred) if j == m] Mag400p = -((np.log10(y_pred[mp] * fm) - 31.4) / 0.4) Mag400p_err = (-(np.log10(y_pred[mp] * fm) - 31.4) / 0.4) - (-(np.log10(likel[mp] * fm) - 31.4) / 0.4) print 'Peak item, Mag, Mag-err, Peak:', mp, Mag400p, Mag400p_err, x[mp]
#y_sample_yo = gp.sample_y(Xtr_1, 1) try: gp.fit(Xtr, Ytr) print "marginal likelihood:", gp.log_marginal_likelihood() y_pred, y_sigma = gp.predict(Xtst, return_std=True) print(y_pred.shape) result_time = [g + 1 for g in range(D, r)] s = "time interval between "+str(result_time[0])+" and "+str(result_time[-1])+\ " minutes\n window is "+str(D) plt.xlabel(s=s) ylab = labels[z] plt.ylabel(ylab) o = "Using "+str(gp.get_params()['kernel'])+" kernel\nwith "+str(total_samp)+" averaged training samples\nand "+str(r)+\ " averaged test samples" plt.title(s=o) #ploting data #plt.plot(result_time, y_sample_yo, "c-", label= "kernel sample") plt.plot(result_time, Ycomp, "y-", label="training") plt.plot(result_time, y_pred.T[0], "g-", label="predicted") plt.plot(result_time, Ytst.T[0], "m-", label="real") plt.fill(np.concatenate([result_time, result_time[::-1]]), np.concatenate([ y_pred - 1.96 * y_sigma, (y_pred + 1.96 * y_sigma)[::-1] ]), alpha=.5, fc='b', ec='none')