Exemplo n.º 1
0
 def objective(theta):
   logger.debug('Current hyperparameters : ' + ' '.join('%0.4e' % i for i in theta))
   test_params = np.copy(params)
   test_params[free] = theta 
   test_network_params = test_params[:n]
   test_station_params = test_params[n:]
   net_gp = composite(network_model,test_network_params,gpnetwork.CONSTRUCTORS)
   sta_gp = composite(station_model,test_station_params,gpstation.CONSTRUCTORS)
   # station process
   sta_sigma,sta_p = station_sigma_and_p(sta_gp,t,mask)
   # add data noise to the diagonals of sta_sigma. Both matrices are
   # sparse so this is efficient
   obs_sigma = _as_covariance(sd)
   sta_sigma = as_sparse_or_array(sta_sigma + obs_sigma)
   # network process
   net_sigma = net_gp._covariance(z,z,diff,diff)
   net_p = net_gp._basis(z,diff)
   # combine station gp with the network gp
   mu = np.zeros(z.shape[0])
   sigma = as_sparse_or_array(sta_sigma + net_sigma)
   p = np.hstack((sta_p,net_p))
   del sta_sigma,net_sigma,obs_sigma,sta_p,net_p
   try:
     out = likelihood(d,mu,sigma,p=p)
   except np.linalg.LinAlgError as err:
     logger.warning(
       'An error was raised while computing the log '
       'likelihood:\n\n%s\n' % repr(err))
     logger.warning('Returning -INF for the log likelihood')   
     out = -np.inf
     
   logger.debug('Log likelihood : %.8e' % out)
   return out  
Exemplo n.º 2
0
    def objective(theta):
        logger.debug('Current hyperparameters : ' + ' '.join('%0.4e' % i
                                                             for i in theta))
        test_params = np.copy(params)
        test_params[free] = theta
        test_network_params = test_params[:n]
        test_station_params = test_params[n:]
        net_gp = composite(network_model, test_network_params,
                           gpnetwork.CONSTRUCTORS)
        sta_gp = composite(station_model, test_station_params,
                           gpstation.CONSTRUCTORS)
        # station process
        sta_sigma, sta_p = station_sigma_and_p(sta_gp, t, mask)
        # add data noise to the diagonals of sta_sigma. Both matrices are
        # sparse so this is efficient
        obs_sigma = _as_covariance(sd)
        sta_sigma = as_sparse_or_array(sta_sigma + obs_sigma)
        # network process
        net_sigma = net_gp._covariance(z, z, diff, diff)
        net_p = net_gp._basis(z, diff)
        # combine station gp with the network gp
        mu = np.zeros(z.shape[0])
        sigma = as_sparse_or_array(sta_sigma + net_sigma)
        p = np.hstack((sta_p, net_p))
        del sta_sigma, net_sigma, obs_sigma, sta_p, net_p
        try:
            out = likelihood(d, mu, sigma, p=p)
        except np.linalg.LinAlgError as err:
            logger.warning('An error was raised while computing the log '
                           'likelihood:\n\n%s\n' % repr(err))
            logger.warning('Returning -INF for the log likelihood')
            out = -np.inf

        logger.debug('Log likelihood : %.8e' % out)
        return out
Exemplo n.º 3
0
    def log_likelihood(self, y, d, dcov=None, dvecs=None):
        '''
        Returns the log likelihood of drawing the observations `d` from the
        Gaussian process. The observations could potentially have noise which
        is described by `dcov` and `dvecs`. If the Gaussian process contains
        any basis functions or if `dvecs` is specified, then the restricted
        log likelihood is returned.

        Parameters
        ----------
        y : (N, D) array
            Observation points.

        d : (N,) array
            Observed values at `y`.

        dcov : (N, N) array or sparse matrix, optional
            Data covariance. If not given, this will be a dense matrix of
            zeros.

        dvecs : (N, P) float array, optional
            Basis vectors for the noise. The data noise is assumed to contain
            some unknown linear combination of the columns of `dvecs`.

        Returns
        -------
        float

        '''
        y = np.asarray(y, dtype=float)
        assert_shape(y, (None, self.dim), 'y')
        n, dim = y.shape

        d = np.asarray(d, dtype=float)
        assert_shape(d, (n, ), 'd')

        if dcov is None:
            dcov = np.zeros((n, n), dtype=float)
        else:
            dcov = as_sparse_or_array(dcov)
            assert_shape(dcov, (n, n), 'dcov')

        if dvecs is None:
            dvecs = np.zeros((n, 0), dtype=float)
        else:
            dvecs = np.asarray(dvecs, dtype=float)
            assert_shape(dvecs, (n, None), 'dvecs')

        mu = self.mean(y)
        cov = as_sparse_or_array(dcov + self.covariance(y, y))
        vecs = np.hstack((self.basis(y), dvecs))

        out = log_likelihood(d, mu, cov, vecs=vecs)
        return out
Exemplo n.º 4
0
def sample(mu, cov, use_cholesky=False, count=None):
    '''
    Draws a random sample from the multivariate normal distribution.

    Parameters
    ----------
    mu : (N,) array
        Mean vector.

    cov : (N, N) array or sparse matrix
        Covariance matrix.

    use_cholesky : bool, optional
        Whether to use the Cholesky decomposition or eigenvalue decomposition.
        The former is faster but fails when `cov` is not numerically positive
        definite.

    count : int, optional
        Number of samples to draw.

    Returns
    -------
    (N,) or (count, N) array

    '''
    mu = np.asarray(mu)
    assert_shape(mu, (None, ), 'mu')
    n = mu.shape[0]

    cov = as_sparse_or_array(cov)
    assert_shape(cov, (n, n), 'cov')

    if use_cholesky:
        # draw a sample using a cholesky decomposition. This assumes that `cov`
        # is numerically positive definite (i.e. no small negative eigenvalues
        # from rounding error).
        L = PosDefSolver(cov).L()
        if count is None:
            w = np.random.normal(0.0, 1.0, n)
            u = mu + L.dot(w)
        else:
            w = np.random.normal(0.0, 1.0, (n, count))
            u = (mu[:, None] + L.dot(w)).T

    else:
        # otherwise use an eigenvalue decomposition, ignoring negative
        # eigenvalues. If `cov` is sparse then begrudgingly make it dense.
        cov = as_array(cov)
        vals, vecs = np.linalg.eigh(cov)
        keep = (vals > 0.0)
        vals = np.sqrt(vals[keep])
        vecs = vecs[:, keep]
        if count is None:
            w = np.random.normal(0.0, vals)
            u = mu + vecs.dot(w)
        else:
            w = np.random.normal(0.0, vals[:, None].repeat(count, axis=1))
            u = (mu[:, None] + vecs.dot(w)).T

    return u
Exemplo n.º 5
0
def _fit(d,s,mu,sigma,p):
  ''' 
  conditions the discrete Gaussian process described by *mu*, *sigma*,
  and *p* with the observations *d* which have uncertainty *s*.
  Returns the mean and standard deviation of the posterior at the
  observation points.  
  '''  
  n,m = p.shape
  # *A* is the Gaussian process covariance with the noise
  # covariance added
  A = as_sparse_or_array(sigma + _as_covariance(s))
  Ksolver = PartitionedPosDefSolver(A,p)
  # compute mean of the posterior 
  vec1,vec2 = Ksolver.solve(d - mu,np.zeros(m)) 
  u = mu + sigma.dot(vec1) + p.dot(vec2)   
  # compute std. dev. of the posterior
  if sp.issparse(sigma):
    sigma = sigma.A

  mat1,mat2 = Ksolver.solve(sigma.T,p.T)
  del A,Ksolver
  #  # just compute the diagonal components of the covariance matrix
  #  # note that A.dot(B).diagonal() == np.sum(A*B.T,axis=1)
  su = np.sqrt(sigma.diagonal() - 
               np.sum(sigma*mat1.T,axis=1) -
               np.sum(p*mat2.T,axis=1))
                 
  return u,su
Exemplo n.º 6
0
def autoclean(t,x,d,sd,
              network_model,
              network_params,
              station_model,
              station_params,
              tol):
  ''' 
  Returns a dataset that has been cleaned of outliers using a data
  editing algorithm.
  '''
  t = np.asarray(t,dtype=float)
  x = np.asarray(x,dtype=float)
  de = np.array(d,dtype=float,copy=True)
  sde = np.array(sd,dtype=float,copy=True)
  diff = np.array([0,0,0])

  net_gp = composite(network_model,network_params,gpnetwork.CONSTRUCTORS)
  sta_gp = composite(station_model,station_params,gpstation.CONSTRUCTORS)

  t_grid,x0_grid = np.meshgrid(t,x[:,0],indexing='ij')
  t_grid,x1_grid = np.meshgrid(t,x[:,1],indexing='ij')
  # flat observation times and positions
  z = np.array([t_grid.ravel(),
                x0_grid.ravel(),
                x1_grid.ravel()]).T

  # mask indicates missing data
  mask = np.isinf(sde)
  zu,du,sdu = z[~mask.ravel()],de[~mask],sde[~mask]
  # Build covariance and basis vectors for the combined process. Do
  # not evaluated at masked points
  sta_sigma,sta_p = station_sigma_and_p(sta_gp,t,mask)
  net_sigma = net_gp._covariance(zu,zu,diff,diff)
  net_p = net_gp._basis(zu,diff)
  # combine station gp with the network gp
  mu = np.zeros(zu.shape[0])  
  sigma = as_sparse_or_array(sta_sigma + net_sigma)
  p = np.hstack((sta_p,net_p))
  del sta_sigma,net_sigma,sta_p,net_p
  # returns the indices of outliers 
  out_idx = outliers(du,sdu,
                     mu=mu,sigma=sigma,p=p,
                     tol=tol)
  # mask the outliers in *de* and *sde*
  r,c = np.nonzero(~mask)
  de[r[out_idx],c[out_idx]] = np.nan
  sde[r[out_idx],c[out_idx]] = np.inf
  return (de,sde)
Exemplo n.º 7
0
def fit(t,x,d,sd,
        network_model,
        network_params,
        station_model,
        station_params):
  ''' 
  Fit network and station processes to the observations, not
  distinguishing between signal and noise.
  '''
  t = np.asarray(t,dtype=float)
  x = np.asarray(x,dtype=float)
  d = np.array(d,dtype=float)
  sd = np.array(sd,dtype=float)
  diff = np.array([0,0,0])

  net_gp = composite(network_model,network_params,gpnetwork.CONSTRUCTORS)
  sta_gp = composite(station_model,station_params,gpstation.CONSTRUCTORS)

  t_grid,x0_grid = np.meshgrid(t,x[:,0],indexing='ij')
  t_grid,x1_grid = np.meshgrid(t,x[:,1],indexing='ij')
  # flat observation times and positions
  z = np.array([t_grid.ravel(),
                x0_grid.ravel(),
                x1_grid.ravel()]).T

  # mask indicates missing data
  mask = np.isinf(sd)
  z,d,sd = z[~mask.ravel()],d[~mask],sd[~mask]

  # Build covariance and basis vectors for the combined process. Do
  # not evaluated at masked points
  sta_sigma,sta_p = station_sigma_and_p(sta_gp,t,mask)
  net_sigma = net_gp._covariance(z,z,diff,diff)
  net_p = net_gp._basis(z,diff)
  # combine station gp with the network gp
  mu = np.zeros(z.shape[0])
  sigma = as_sparse_or_array(sta_sigma + net_sigma)
  p = np.hstack((sta_p,net_p))
  del sta_sigma,net_sigma,sta_p,net_p
  # best fit combination of signal and noise to the observations
  uf,suf = _fit(d,sd,mu,sigma,p)
  # fold back into 2d arrays
  u = np.full((t.shape[0],x.shape[0]),np.nan)
  u[~mask] = uf
  su = np.full((t.shape[0],x.shape[0]),np.inf)
  su[~mask] = suf
  return u,su
Exemplo n.º 8
0
def autoclean(t, x, d, sd, network_model, network_params, station_model,
              station_params, tol):
    ''' 
  Returns a dataset that has been cleaned of outliers using a data
  editing algorithm.
  '''
    t = np.asarray(t, dtype=float)
    x = np.asarray(x, dtype=float)
    de = np.array(d, dtype=float, copy=True)
    sde = np.array(sd, dtype=float, copy=True)
    diff = np.array([0, 0, 0])

    net_gp = composite(network_model, network_params, gpnetwork.CONSTRUCTORS)
    sta_gp = composite(station_model, station_params, gpstation.CONSTRUCTORS)

    t_grid, x0_grid = np.meshgrid(t, x[:, 0], indexing='ij')
    t_grid, x1_grid = np.meshgrid(t, x[:, 1], indexing='ij')
    # flat observation times and positions
    z = np.array([t_grid.ravel(), x0_grid.ravel(), x1_grid.ravel()]).T

    # mask indicates missing data
    mask = np.isinf(sde)
    zu, du, sdu = z[~mask.ravel()], de[~mask], sde[~mask]
    # Build covariance and basis vectors for the combined process. Do
    # not evaluated at masked points
    sta_sigma, sta_p = station_sigma_and_p(sta_gp, t, mask)
    net_sigma = net_gp._covariance(zu, zu, diff, diff)
    net_p = net_gp._basis(zu, diff)
    # combine station gp with the network gp
    mu = np.zeros(zu.shape[0])
    sigma = as_sparse_or_array(sta_sigma + net_sigma)
    p = np.hstack((sta_p, net_p))
    del sta_sigma, net_sigma, sta_p, net_p
    # returns the indices of outliers
    out_idx = outliers(du, sdu, mu=mu, sigma=sigma, p=p, tol=tol)
    # mask the outliers in *de* and *sde*
    r, c = np.nonzero(~mask)
    de[r[out_idx], c[out_idx]] = np.nan
    sde[r[out_idx], c[out_idx]] = np.inf
    return (de, sde)
Exemplo n.º 9
0
        def fout(x1, x2, diff1, diff2):
            if (not any(diff1)) & (not any(diff2)):
                return as_sparse_or_array(fin(x1, x2))

            elif any(diff1):
                diff1_axis = np.argmax(diff1)
                x1_plus_dx = np.copy(x1)
                x1_plus_dx[:, diff1_axis] += delta
                diff1_minus_one = np.copy(diff1)
                diff1_minus_one[diff1_axis] -= 1
                out = (fout(x1_plus_dx, x2, diff1_minus_one, diff2) -
                       fout(x1, x2, diff1_minus_one, diff2)) / delta
                return out

            else:
                # any(diff2) == True
                diff2_axis = np.argmax(diff2)
                x2_plus_dx = np.copy(x2)
                x2_plus_dx[:, diff2_axis] += delta
                diff2_minus_one = np.copy(diff2)
                diff2_minus_one[diff2_axis] -= 1
                out = (fout(x1, x2_plus_dx, diff1, diff2_minus_one) -
                       fout(x1, x2, diff1, diff2_minus_one)) / delta
                return out
Exemplo n.º 10
0
def _condition(gp, y, d, dcov, dvecs, ddiff, build_inverse):
    '''
    Returns a conditioned `GaussianProcess`.
    '''
    if gp._mean is None:
        prior_mean = zero_mean
    else:
        prior_mean = gp._mean

    if gp._covariance is None:
        prior_covariance = zero_covariance
        prior_variance = zero_variance
    else:
        prior_covariance = gp._covariance
        if gp._variance is None:
            prior_variance = naive_variance_constructor(prior_covariance)
        else:
            prior_variance = gp._variance

    if gp._basis is None:
        prior_basis = empty_basis
    else:
        prior_basis = gp._basis

    # covariance of the observation points
    cov = dcov + prior_covariance(y, y, ddiff, ddiff)
    cov = as_sparse_or_array(cov)

    # residual at the observation points
    res = d - prior_mean(y, ddiff)

    # basis functions at the observation points
    vecs = prior_basis(y, ddiff)
    if dvecs.shape[1] != 0:
        vecs = np.hstack((vecs, dvecs))

    solver = PartitionedPosDefSolver(cov, vecs, build_inverse=build_inverse)

    # precompute these vectors which are used for `posterior_mean`
    v1, v2 = solver.solve(res)

    del res, cov, vecs

    def posterior_mean(x, diff):
        mu_x = prior_mean(x, diff)
        cov_xy = prior_covariance(x, y, diff, ddiff)
        vecs_x = prior_basis(x, diff)
        if dvecs.shape[1] != 0:
            pad = np.zeros((x.shape[0], dvecs.shape[1]), dtype=float)
            vecs_x = np.hstack((vecs_x, pad))

        out = mu_x + cov_xy.dot(v1) + vecs_x.dot(v2)
        return out

    def posterior_covariance(x1, x2, diff1, diff2):
        cov_x1x2 = prior_covariance(x1, x2, diff1, diff2)
        cov_x1y = prior_covariance(x1, y, diff1, ddiff)
        cov_x2y = prior_covariance(x2, y, diff2, ddiff)
        vecs_x1 = prior_basis(x1, diff1)
        vecs_x2 = prior_basis(x2, diff2)
        if dvecs.shape[1] != 0:
            pad = np.zeros((x1.shape[0], dvecs.shape[1]), dtype=float)
            vecs_x1 = np.hstack((vecs_x1, pad))

            pad = np.zeros((x2.shape[0], dvecs.shape[1]), dtype=float)
            vecs_x2 = np.hstack((vecs_x2, pad))

        m1, m2 = solver.solve(cov_x2y.T, vecs_x2.T)
        out = cov_x1x2 - cov_x1y.dot(m1) - vecs_x1.dot(m2)
        # `out` may either be a matrix or array depending on whether cov_x1x2
        # is sparse or dense. Make the output consistent by converting to array
        out = np.asarray(out)
        return out

    def posterior_variance(x, diff):
        var_x = prior_variance(x, diff)
        cov_xy = prior_covariance(x, y, diff, ddiff)
        vecs_x = prior_basis(x, diff)
        if dvecs.shape[1] != 0:
            pad = np.zeros((x.shape[0], dvecs.shape[1]), dtype=float)
            vecs_x = np.hstack((vecs_x, pad))

        m1, m2 = solver.solve(cov_xy.T, vecs_x.T)
        # Efficiently get the diagonals of C_xy.dot(mat1) and p_x.dot(mat2)
        if sp.issparse(cov_xy):
            diag1 = cov_xy.multiply(m1.T).sum(axis=1).A[:, 0]
        else:
            diag1 = np.einsum('ij, ji -> i', cov_xy, m1)

        diag2 = np.einsum('ij, ji -> i', vecs_x, m2)
        out = var_x - diag1 - diag2
        return out

    out = GaussianProcess(posterior_mean,
                          posterior_covariance,
                          variance=posterior_variance,
                          dim=y.shape[1],
                          differentiable=True)
    return out
Exemplo n.º 11
0
 def added_covariance(x1, x2, diff1, diff2):
     out = as_sparse_or_array(
         gp1._covariance(x1, x2, diff1, diff2) +
         gp2._covariance(x1, x2, diff1, diff2))
     return out
Exemplo n.º 12
0
def outliers(d, dsigma, pcov, pmu=None, pvecs=None, tol=4.0, maxitr=50):
    '''
    Uses a data editing algorithm to identify outliers in `d`. Outliers are
    considered to be the data that are abnormally inconsistent with a
    multivariate normal distribution with mean `pmu`, covariance `pcov`, and
    basis vectors `pvecs`.

    The data editing algorithm first conditions the prior with the
    observations, then it compares each residual (`d` minus the expected value
    of the posterior divided by `dsigma`) to the RMS of residuals. Data with
    residuals greater than `tol` times the RMS are identified as outliers. This
    process is then repeated using the subset of `d` which were not flagged as
    outliers. If no new outliers are detected in an iteration then the
    algorithm stops.

    Parameters
    ----------
    d : (N,) float array
        Observations.

    dsigma : (N,) float array
        One standard deviation uncertainty on the observations.

    pcov : (N, N) array or sparse matrix
        Covariance of the prior at the observation points.

    pmu : (N,) float array, optional
        Mean of the prior at the observation points. Defaults to zeros.

    pvecs : (N, P) float array, optional
        Basis functions of the prior evaluated at the observation points.
        Defaults to an (N, 0) array.

    tol : float, optional
        Outlier tolerance. Smaller values make the algorithm more likely to
        identify outliers. A good value is 4.0 and this should not be set any
        lower than 2.0.

    maxitr : int, optional
        Maximum number of iterations.

    Returns
    -------
    (N,) bool array
        Array indicating which data are outliers

    '''
    d = np.asarray(d, dtype=float)
    assert_shape(d, (None, ), 'd')
    n = d.shape[0]

    dsigma = np.asarray(dsigma, dtype=float)
    assert_shape(dsigma, (n, ), 'dsigma')

    pcov = as_sparse_or_array(pcov, dtype=float)
    assert_shape(pcov, (n, n), 'pcov')

    if pmu is None:
        pmu = np.zeros((n, ), dtype=float)
    else:
        pmu = np.asarray(pmu, dtype=float)
        assert_shape(pmu, (n, ), 'pmu')

    if pvecs is None:
        pvecs = np.zeros((n, 0), dtype=float)
    else:
        pvecs = np.asarray(pvecs, dtype=float)
        assert_shape(pvecs, (n, None), 'pvecs')

    # Total number of outlier detection iterations completed thus far
    itr = 0
    inliers = np.ones(n, dtype=bool)
    while True:
        LOGGER.debug('Starting iteration %d of outlier detection.' % (itr + 1))
        # Remove rows and cols corresponding to the outliers
        pcov_i = pcov[:, inliers][inliers, :]
        pmu_i = pmu[inliers]
        pvecs_i = pvecs[inliers]
        d_i = d[inliers]
        dsigma_i = dsigma[inliers]
        if sp.issparse(pcov):
            pcov_i = (pcov_i + sp.diags(dsigma_i**2)).tocsc()
        else:
            pcov_i = pcov_i + np.diag(dsigma_i**2)

        # Find the mean of the posterior
        solver = PartitionedPosDefSolver(pcov_i, pvecs_i)
        v1, v2 = solver.solve(d_i - pmu_i)
        fit = pmu + pcov[:, inliers].dot(v1) + pvecs.dot(v2)

        # find new outliers based on the misfit
        res = np.abs(fit - d) / dsigma
        rms = np.sqrt(np.mean(res[inliers]**2))
        new_inliers = res < tol * rms
        if np.all(inliers == new_inliers):
            break
        else:
            inliers = new_inliers
            itr += 1
            if itr == maxitr:
                warnings.warn('Reached the maximum number of iterations')
                break

    LOGGER.debug('Detected %s outliers out of %s observations' %
                 (inliers.size - inliers.sum(), inliers.size))

    outliers = ~inliers
    return outliers
Exemplo n.º 13
0
def log_likelihood(d, mu, cov, vecs=None):
    '''
    Returns the log likelihood of observing `d` from a multivariate normal
    distribution with mean `mu` and covariance `cov`.

    When `vecs` is specified, the restricted log likelihood is returned. The
    restricted log likelihood is the probability of observing `R.dot(d)` from a
    normally distributed random vector with mean `R.dot(mu)` and covariance
    `R.dot(sigma).dot(R.T)`, where `R` is a matrix with rows that are
    orthogonal to the columns of `vecs`. See [1] or [2] for more information.

    Parameters
    ----------
    d : (N,) array
        Observation vector.

    mu : (N,) array
        Mean vector.

    cov : (N, N) array or sparse matrix
        Covariance matrix.

    vecs : (N, M) array, optional
        Unconstrained basis vectors.

    Returns
    -------
    float

    References
    ----------
    [1] Harville D. (1974). Bayesian Inference of Variance Components Using
    Only Error Contrasts. Biometrica.

    [2] Cressie N. (1993). Statistics for Spatial Data. John Wiley & Sons.

    '''
    d = np.asarray(d, dtype=float)
    assert_shape(d, (None, ), 'd')
    n = d.shape[0]

    mu = np.asarray(mu, dtype=float)
    assert_shape(mu, (n, ), 'mu')

    cov = as_sparse_or_array(cov)
    assert_shape(cov, (n, n), 'cov')

    if vecs is None:
        vecs = np.zeros((n, 0), dtype=float)
    else:
        vecs = np.asarray(vecs, dtype=float)
        assert_shape(vecs, (n, None), 'vecs')

    m = vecs.shape[1]

    A = PosDefSolver(cov)
    B = A.solve_L(vecs)
    C = PosDefSolver(B.T.dot(B))
    D = PosDefSolver(vecs.T.dot(vecs))

    a = A.solve_L(d - mu)
    b = C.solve_L(B.T.dot(a))

    out = 0.5 * (D.log_det() - A.log_det() - C.log_det() - a.T.dot(a) +
                 b.T.dot(b) - (n - m) * np.log(2 * np.pi))
    return out
Exemplo n.º 14
0
    def condition(self,
                  y,
                  d,
                  dcov=None,
                  dvecs=None,
                  ddiff=None,
                  build_inverse=False):
        '''
        Returns a `GaussianProcess` conditioned on the data.

        Parameters
        ----------
        y : (N, D) float array
            Observation points.

        d : (N,) float array
            Observed values at `y`.

        dcov : (N, N) array or sparse matrix, optional
            Covariance of the data noise. Defaults to a dense array of zeros.

        dvecs : (N, P) array, optional
            Data noise basis vectors. The data noise is assumed to contain some
            unknown linear combination of the columns of `dvecs`.

        ddiff : (D,) int array, optional
            Derivative of the observations. For example, use (1,) if the
            observations are 1-D and should constrain the slope.

        build_inverse : bool, optional
            Whether to construct the inverse matrices rather than just the
            factors.

        Returns
        -------
        GaussianProcess

        '''
        y = np.asarray(y, dtype=float)
        assert_shape(y, (None, self.dim), 'y')
        n, dim = y.shape

        d = np.asarray(d, dtype=float)
        assert_shape(d, (n, ), 'd')

        if dcov is None:
            dcov = np.zeros((n, n), dtype=float)
        else:
            dcov = as_sparse_or_array(dcov)
            assert_shape(dcov, (n, n), 'dcov')

        if dvecs is None:
            dvecs = np.zeros((n, 0), dtype=float)
        else:
            dvecs = np.asarray(dvecs, dtype=float)
            assert_shape(dvecs, (n, None), 'dvecs')

        if ddiff is None:
            ddiff = np.zeros(dim, dtype=int)
        else:
            ddiff = np.asarray(ddiff, dtype=int)
            assert_shape(ddiff, (dim, ), 'ddiff')

        out = _condition(self,
                         y,
                         d,
                         dcov,
                         dvecs,
                         ddiff,
                         build_inverse=build_inverse)
        return out
Exemplo n.º 15
0
def strain(t,x,d,sd,
           network_prior_model,
           network_prior_params,
           network_noise_model,
           network_noise_params,
           station_noise_model,
           station_noise_params,
           out_t,out_x,rate,
           covariance):
  ''' 
  Computes deformation gradients from displacement data.
  '''  
  t = np.asarray(t,dtype=float)
  x = np.asarray(x,dtype=float)
  d = np.array(d,dtype=float)
  sd = np.array(sd,dtype=float)
  diff = np.array([0,0,0])

  t_grid,x0_grid = np.meshgrid(t,x[:,0],indexing='ij')  
  t_grid,x1_grid = np.meshgrid(t,x[:,1],indexing='ij')  
  # flat observation times and positions
  z = np.array([t_grid.ravel(),
                x0_grid.ravel(),
                x1_grid.ravel()]).T

  t_grid,x0_grid = np.meshgrid(out_t,out_x[:,0],indexing='ij')  
  t_grid,x1_grid = np.meshgrid(out_t,out_x[:,1],indexing='ij')  
  # flat observation times and positions
  out_z = np.array([t_grid.ravel(),
                    x0_grid.ravel(),
                    x1_grid.ravel()]).T

  prior_gp = composite(network_prior_model,network_prior_params,gpnetwork.CONSTRUCTORS)
  noise_gp = composite(network_noise_model,network_noise_params,gpnetwork.CONSTRUCTORS)
  sta_gp   = composite(station_noise_model,station_noise_params,gpstation.CONSTRUCTORS)

  # find missing data
  mask = np.isinf(sd)
  # get unmasked data and uncertainties
  z,d,sd = z[~mask.ravel()],d[~mask],sd[~mask]
  # build noise covariance and basis vectors
  sta_sigma,sta_p = station_sigma_and_p(sta_gp,t,mask)
  # add data noise to the station noise
  obs_sigma = _as_covariance(sd)
  sta_sigma = as_sparse_or_array(sta_sigma + obs_sigma)
  # make network noise
  net_sigma = noise_gp._covariance(z,z,diff,diff)
  net_p = noise_gp._basis(z,diff)
  # combine noise processes
  noise_sigma = as_sparse_or_array(sta_sigma + net_sigma)
  noise_p = np.hstack((sta_p,net_p))
  del sta_sigma,net_sigma,obs_sigma,sta_p,net_p
  # condition the prior with the data
  post_gp = prior_gp.condition(z,d,sigma=noise_sigma,p=noise_p)
  if rate:
    dudx_gp = post_gp.differentiate((1,1,0)) # x derivative of velocity
    dudy_gp = post_gp.differentiate((1,0,1)) # y derivative of velocity

  else:  
    dudx_gp = post_gp.differentiate((0,1,0)) # x derivative of displacement
    dudy_gp = post_gp.differentiate((0,0,1)) # y derivative of displacement

  if covariance:
    # Evaluate the mean and covariances of the posterior
    dudx = dudx_gp.mean(out_z)
    cdudx = dudx_gp.covariance(out_z,out_z)
    sdudx = np.sqrt(np.diag(cdudx))

    dudy = dudy_gp.mean(out_z)
    cdudy = dudy_gp.covariance(out_z,out_z)
    sdudy = np.sqrt(np.diag(cdudy))

    dudx  = dudx.reshape((out_t.shape[0],out_x.shape[0]))
    sdudx = sdudx.reshape((out_t.shape[0],out_x.shape[0]))
    cdudx = cdudx.reshape((out_t.shape[0],out_x.shape[0],
                           out_t.shape[0],out_x.shape[0]))
    dudy  = dudy.reshape((out_t.shape[0],out_x.shape[0]))
    sdudy = sdudy.reshape((out_t.shape[0],out_x.shape[0]))
    cdudy = cdudy.reshape((out_t.shape[0],out_x.shape[0],
                           out_t.shape[0],out_x.shape[0]))
    out = (dudx,sdudx,cdudx,dudy,sdudy,cdudy)

  else:
    # Just evaluate the mean and standard deviations of the posterior
    dudx,sdudx = dudx_gp.meansd(out_z,chunk_size=1000)
    dudy,sdudy = dudy_gp.meansd(out_z,chunk_size=1000)

    dudx = dudx.reshape((out_t.shape[0],out_x.shape[0]))
    sdudx = sdudx.reshape((out_t.shape[0],out_x.shape[0]))
    dudy = dudy.reshape((out_t.shape[0],out_x.shape[0]))
    sdudy = sdudy.reshape((out_t.shape[0],out_x.shape[0]))
    out = (dudx,sdudx,dudy,sdudy)
        
  return out
Exemplo n.º 16
0
def strain(t, x, d, sd, network_prior_model, network_prior_params,
           network_noise_model, network_noise_params, station_noise_model,
           station_noise_params, out_t, out_x, rate, covariance):
    ''' 
  Computes deformation gradients from displacement data.
  '''
    t = np.asarray(t, dtype=float)
    x = np.asarray(x, dtype=float)
    d = np.array(d, dtype=float)
    sd = np.array(sd, dtype=float)
    diff = np.array([0, 0, 0])

    t_grid, x0_grid = np.meshgrid(t, x[:, 0], indexing='ij')
    t_grid, x1_grid = np.meshgrid(t, x[:, 1], indexing='ij')
    # flat observation times and positions
    z = np.array([t_grid.ravel(), x0_grid.ravel(), x1_grid.ravel()]).T

    t_grid, x0_grid = np.meshgrid(out_t, out_x[:, 0], indexing='ij')
    t_grid, x1_grid = np.meshgrid(out_t, out_x[:, 1], indexing='ij')
    # flat observation times and positions
    out_z = np.array([t_grid.ravel(), x0_grid.ravel(), x1_grid.ravel()]).T

    prior_gp = composite(network_prior_model, network_prior_params,
                         gpnetwork.CONSTRUCTORS)
    noise_gp = composite(network_noise_model, network_noise_params,
                         gpnetwork.CONSTRUCTORS)
    sta_gp = composite(station_noise_model, station_noise_params,
                       gpstation.CONSTRUCTORS)

    # find missing data
    mask = np.isinf(sd)
    # get unmasked data and uncertainties
    z, d, sd = z[~mask.ravel()], d[~mask], sd[~mask]
    # build noise covariance and basis vectors
    sta_sigma, sta_p = station_sigma_and_p(sta_gp, t, mask)
    # add data noise to the station noise
    obs_sigma = _as_covariance(sd)
    sta_sigma = as_sparse_or_array(sta_sigma + obs_sigma)
    # make network noise
    net_sigma = noise_gp._covariance(z, z, diff, diff)
    net_p = noise_gp._basis(z, diff)
    # combine noise processes
    noise_sigma = as_sparse_or_array(sta_sigma + net_sigma)
    noise_p = np.hstack((sta_p, net_p))
    del sta_sigma, net_sigma, obs_sigma, sta_p, net_p
    # condition the prior with the data
    post_gp = prior_gp.condition(z, d, sigma=noise_sigma, p=noise_p)
    if rate:
        dudx_gp = post_gp.differentiate((1, 1, 0))  # x derivative of velocity
        dudy_gp = post_gp.differentiate((1, 0, 1))  # y derivative of velocity

    else:
        dudx_gp = post_gp.differentiate(
            (0, 1, 0))  # x derivative of displacement
        dudy_gp = post_gp.differentiate(
            (0, 0, 1))  # y derivative of displacement

    if covariance:
        # Evaluate the mean and covariances of the posterior
        dudx = dudx_gp.mean(out_z)
        cdudx = dudx_gp.covariance(out_z, out_z)
        sdudx = np.sqrt(np.diag(cdudx))

        dudy = dudy_gp.mean(out_z)
        cdudy = dudy_gp.covariance(out_z, out_z)
        sdudy = np.sqrt(np.diag(cdudy))

        dudx = dudx.reshape((out_t.shape[0], out_x.shape[0]))
        sdudx = sdudx.reshape((out_t.shape[0], out_x.shape[0]))
        cdudx = cdudx.reshape(
            (out_t.shape[0], out_x.shape[0], out_t.shape[0], out_x.shape[0]))
        dudy = dudy.reshape((out_t.shape[0], out_x.shape[0]))
        sdudy = sdudy.reshape((out_t.shape[0], out_x.shape[0]))
        cdudy = cdudy.reshape(
            (out_t.shape[0], out_x.shape[0], out_t.shape[0], out_x.shape[0]))
        out = (dudx, sdudx, cdudx, dudy, sdudy, cdudy)

    else:
        # Just evaluate the mean and standard deviations of the posterior
        dudx, sdudx = dudx_gp.meansd(out_z, chunk_size=1000)
        dudy, sdudy = dudy_gp.meansd(out_z, chunk_size=1000)

        dudx = dudx.reshape((out_t.shape[0], out_x.shape[0]))
        sdudx = sdudx.reshape((out_t.shape[0], out_x.shape[0]))
        dudy = dudy.reshape((out_t.shape[0], out_x.shape[0]))
        sdudy = sdudy.reshape((out_t.shape[0], out_x.shape[0]))
        out = (dudx, sdudx, dudy, sdudy)

    return out