Пример #1
0
    def fit(self, X, Y, Sigma):
        """
        @brief the method whether the fitting magic happens

        Parameters
        ----------
        X : double array_like
            An array with shape (n_samples, ) or (n_samples, n_dim) with the 
            input at which observations were made.

        Y : double array_like
            An array containing the observations f(X)
        
        Sigma : double array_like
            An array of shape (n_samples, ) containing the measurement errors 
            of Y, or a (n_samples, n_samples) covariance matrix of the data.

        Returns
        -------
        gp : self
            A fitted Gaussian Process model object awaiting data to perform
            predictions.
        """

        
        # set the observational data
        self._set_data(X, Y, Sigma)
        
        # calculate matrix of distances D between input X samples
        D = tools.l1_distances(self.X)
        self.D = D

        # do the fitting
        # Maximum Likelihood Estimation of the parameters
        if self.verbose:
            print("Performing Maximum Likelihood Estimation of the "
                + "autocorrelation parameters...")
                
        self.theta, self.likelihood_function_value, par = self.optimize()
        if np.isinf(self.likelihood_function_value):
            raise Exception("Bad parameter region. Try increasing upper bound")
        
        self.alpha = par['alpha']
        self.L     = par['L']

        return self
Пример #2
0
    def predict(self, Xstar):
        """
        @brief evaluate the Gaussian Process model at X.

        Parameters
        ----------
        Xstar : array_like
            An array with shape (n_eval, n_features) giving the point(s) at
            which the prediction(s) should be made.

        Returns
        -------
        y : array_like
            An array with shape (n_eval, ) with the predicted value, f(x)

        sigma : array_like
            An array with shape (n_eval, ) with the standard deviation at x.
        """
        
        n = len(Xstar)
        # Check input shapes
        if(np.shape(Xstar) == (n,)):
            Xstar = np.reshape(Xstar, (n, 1))
            
        n_eval, n_dim_Xstar = Xstar.shape
        n_samples, n_dim_X = self.X.shape

        # Run input checks
        if n_dim_Xstar != n_dim_X:
            raise ValueError(("The number of dimensions in Xstar "
                              "(Xstar.shape[1] = %d) "
                              "should match the sample size used for fit() "
                              "which is %d.") % (n_dim_Xstar, n_dim_X))


        fmean = np.zeros(n)
        fstd = np.zeros(n)
        
        for i in range(n):
            
            thisXstar = Xstar[i, :]
            nstar = thisXstar.shape[0]
            
            # Get pairwise componentwise L1-distances to the input training set
            dx = tools.l1_distances(tools.array2d(thisXstar), self.X)
        
            # the covariance vector between these distances and training set
            kstar = self.covf.covfunc(self.theta, dx).T
            kstar = kstar.flatten()
            
            # the predictive mean
            mean = np.dot(kstar.T, self.alpha)
        
            # calculate predictive standard deviation
            v = linalg.solve(self.L, kstar)
        
            # now compute cov(Xstar, Xstar)
            dxx = tools.l1_distances(tools.array2d(thisXstar))
            covstar = self.covf.covfunc(self.theta, dxx).T
            covstar = covstar.flatten()[0]
        
            var = covstar - np.dot(v.T, v)
            
            if (self.mu != None):
                mean += self.mu(Xstar, *self.muargs)
            
            if var < 0.: var = 0
            
            fmean[i] = mean
            fstd[i] = np.sqrt(var)
            
        return fmean, fstd
Пример #3
0
    def fit(self, X, Y, Sigma, batch_size=200, overlap=0.2):
        """
        @brief the method whether the fitting magic happens

        Parameters
        ----------
        X : double array_like
            An array with shape (n_samples, ) or (n_samples, n_dim) with the 
            input at which observations were made.

        Y : double array_like
            An array containing the observations f(X)
        
        Sigma : double array_like
            An array of shape (n_samples, ) containing the measurement errors 
            of Y, or a (n_samples, n_samples) covariance matrix of the data.
            
        batch_size : double, optional
            the width in points along the x-axis to fit one at a time
            
        overlap : double, optional
            the percentage of points to overlap between batches that are fit

        Returns
        -------
        gp : self
            A fitted Gaussian Process model object awaiting data to perform
            predictions.
        """
        nprocs = comm.Get_size()
        myrank = comm.Get_rank()
        
        nbatches = max(1, len(X)/batch_size)
        self.nbatches = nbatches
        
        n_samples = len(X)
        self.fits = []
        self.xbounds = []
        
        # do given batch of the fit based on rank
        for k in range(myrank, nbatches, nprocs):
            
            batch_from = k * batch_size 
            batch_to = min([(k + 1) * batch_size + 1, n_samples + 1])
            if k == nbatches-1: batch_to = len(X)
        
            # keep track of the x bounds
            xmin = np.amin(X[batch_from:batch_to])
            xmax = np.amax(X[batch_from:batch_to])
            self.xbounds.append((xmin, xmax))
            
            batch_to += overlap*batch_size
            batch_from -= overlap*batch_size
            if batch_from < 0: batch_from = 0
        
            X_cut    = X[batch_from:batch_to]
            Y_cut    = Y[batch_from:batch_to]
            Sigma_cut = Sigma[batch_from:batch_to]
            
            # set the observational data
            self._set_data(X_cut, Y_cut, Sigma_cut)
        
            # calculate matrix of distances D between input X samples
            D = tools.l1_distances(self.X)
            self.D = D

            # do the fitting
            # Maximum Likelihood Estimation of the parameters
            if self.verbose:
                print("Performing Maximum Likelihood Estimation of the "
                    + "autocorrelation parameters...")
                
            self.theta, self.likelihood_function_value, par = self.optimize()
            if np.isinf(self.likelihood_function_value):
                raise Exception("Bad parameter region. Try increasing upper bound")
        
            self.alpha = par['alpha']
            self.L     = par['L']
            
            self.fits.append(copy.deepcopy(self))
        
        return self
Пример #4
0
    def predict(self, Xstar):
        """
        @brief evaluate the Gaussian Process model at X.

        Parameters
        ----------
        Xstar : array_like
            An array with shape (n_eval, n_features) giving the point(s) at
            which the prediction(s) should be made.

        Returns
        -------
        y : array_like
            An array with shape (n_eval, ) with the Best Linear Unbiased
            Prediction at x.

        sigma : array_like
            An array with shape (n_eval, ) with the standard deviation at x.
        """
        nprocs = comm.Get_size()
        myrank = comm.Get_rank()
    
        data_pred = [] # the predicted data
        
        # do given batch of the fit based on rank
        cnt = 0
        for k in range(myrank, self.nbatches, nprocs):
            
            # trim Xstar
            inds = np.where((Xstar>=self.xbounds[cnt][0])*(Xstar<=self.xbounds[cnt][1]))
            Xstar_cut = Xstar[inds]
            
            # check for Xstar outside fitting X range
            if k == 0:
                inds = np.where(Xstar < self.xbounds[cnt][0])
                Xstar_cut = np.append(Xstar[inds], Xstar_cut)
            if k == self.nbatches-1:
                inds = np.where(Xstar > self.xbounds[cnt][1])
                Xstar_cut = np.append(Xstar_cut, Xstar[inds])
            
            n = len(Xstar_cut)
            
            # Check input shapes
            if(np.shape(Xstar_cut) == (n,)):
                Xstar_cut = np.reshape(Xstar_cut, (n, 1))
            
            n_eval, n_dim_Xstar = Xstar_cut.shape
            n_samples, n_dim_X = self.X.shape

            # Run input checks
            if n_dim_Xstar != n_dim_X:
                raise ValueError(("The number of dimensions in Xstar "
                                  "(Xstar.shape[1] = %d) "
                                  "should match the sample size used for fit() "
                                  "which is %d.") % (n_dim_Xstar, n_dim_X))


            # get the right fit
            fit = self.fits[cnt]
            
            fmean = np.zeros(n)
            fstd = np.zeros(n)
        
            for i in range(n):
            
                thisXstar = Xstar_cut[i, :]
                nstar = thisXstar.shape[0]
            
                # Get pairwise componentwise L1-distances to the input training set
                dx = tools.l1_distances(tools.array2d(thisXstar), fit.X)
        
                # the covariance vector between these distances and training set
                kstar = self.covf.covfunc(fit.theta, dx).T
                kstar = kstar.flatten()
            
                # the predictive mean
                mean = np.dot(kstar.T, fit.alpha)
        
                # calculate predictive standard deviation
                v = linalg.solve(fit.L, kstar)
        
                # now compute cov(Xstar, Xstar)
                dxx = tools.l1_distances(tools.array2d(thisXstar))
                covstar = self.covf.covfunc(fit.theta, dxx).T
                covstar = covstar.flatten()[0]
        
                var = covstar - np.dot(v.T, v)
            
                if (self.mu != None):
                    mean += self.mu(Xstar, *self.muargs)
            
                if var < 0.: var = 0
            
                fmean[i] = mean
                fstd[i] = np.sqrt(var)
            
            cnt += 1
           
            # save the predictions
            data_pred.append((k, Xstar_cut, fmean, fstd))
         
        # gather the reconstructed data to the master   
        data_pred = comm.gather(data_pred, root=0) 
        
        # the master combines and returns
        if myrank == 0: 
            x_rec, y_rec, yerr_rec = self._combine_data(data_pred)
            
            return y_rec, yerr_rec
        else:
            return None, None