Пример #1
0
 def test_cholesky_solve_and_invert(self): 
     # create a random positive definite matrix A
     n = 12
     A = np.zeros((n,n))                
     for i in range(n) :
         H = numpy.random.random(n)
         A += np.outer(H,H.T)
     # random X
     X = numpy.random.random(n)
     # compute B
     B = A.dot(X)
     # solve for X given A and B
     Xs,Ai=cholesky_solve_and_invert(A,B)
     # checck inverse
     Id=A.dot(Ai)
     # test some non-diagonal elements
     self.assertAlmostEqual(Id[0,1],0.)
     self.assertAlmostEqual(Id[1,0],0.)
     self.assertAlmostEqual(Id[0,-1],0.)
     self.assertAlmostEqual(Id[-1,0],0.)
     # test diagonal
     Iddiag=np.diag(Id)
     delta=np.diag(Id)-np.ones((n))
     d=np.inner(delta,delta)
     self.assertAlmostEqual(d,0.)
Пример #2
0
 def test_cholesky_solve_and_invert(self):
     # create a random positive definite matrix A
     n = 12
     A = np.zeros((n, n))
     for i in range(n):
         H = numpy.random.random(n)
         A += np.outer(H, H.T)
     # random X
     X = numpy.random.random(n)
     # compute B
     B = A.dot(X)
     # solve for X given A and B
     Xs, Ai = cholesky_solve_and_invert(A, B)
     # checck inverse
     Id = A.dot(Ai)
     # test some non-diagonal elements
     self.assertAlmostEqual(Id[0, 1], 0.)
     self.assertAlmostEqual(Id[1, 0], 0.)
     self.assertAlmostEqual(Id[0, -1], 0.)
     self.assertAlmostEqual(Id[-1, 0], 0.)
     # test diagonal
     Iddiag = np.diag(Id)
     delta = np.diag(Id) - np.ones((n))
     d = np.inner(delta, delta)
     self.assertAlmostEqual(d, 0.)
Пример #3
0
def compute_sky(frame, nsig_clipping=4.):
    """Compute a sky model.

    Input has to correspond to sky fibers only.
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log = get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave = frame.nwave
    nfibers = len(skyfibers)

    current_ivar = frame.ivar[skyfibers].copy()
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    sqrtw = np.sqrt(current_ivar)
    sqrtwflux = sqrtw * flux

    chi2 = np.zeros(flux.shape)

    #debug
    #nfibers=min(nfibers,2)

    nout_tot = 0
    for iteration in range(20):

        A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr()
        B = np.zeros((nwave))
        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))
        # loop on fiber to handle resolution
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d fiber %d" % (iteration, fiber))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD * R  # each row r of R is multiplied by sqrtw[r]

            A = A + (sqrtwR.T * sqrtwR).tocsr()
            B += sqrtwR.T * sqrtwflux[fiber]

        log.info("iter %d solving" % iteration)

        skyflux = cholesky_solve(A.todense(), B)

        log.info("iter %d compute chi2" % iteration)

        for fiber in range(nfibers):

            S = Rsky[fiber].dot(skyflux)
            chi2[fiber] = current_ivar[fiber] * (flux[fiber] - S)**2

        log.info("rejecting")

        nout_iter = 0
        if iteration < 1:
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0)
            selection = np.where(nout_per_wave > 0)[0]
            for i in selection:
                worst_entry = np.argmax(chi2[:, i])
                current_ivar[worst_entry, i] = 0
                sqrtw[worst_entry, i] = 0
                sqrtwflux[worst_entry, i] = 0
                nout_iter += 1

        else:
            # remove all of them at once
            bad = (chi2 > nsig_clipping**2)
            current_ivar *= (bad == 0)
            sqrtw *= (bad == 0)
            sqrtwflux *= (bad == 0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave)
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" %
                 (iteration, sum_chi2, ndf, chi2pdf, nout_iter))

        if nout_iter == 0:
            break

    log.info("nout tot=%d" % nout_tot)

    # solve once again to get deconvolved sky variance
    skyflux, skycovar = cholesky_solve_and_invert(A.todense(), B)

    #- sky inverse variance, but incomplete and not needed anyway
    # skyvar=np.diagonal(skycovar)
    # skyivar=(skyvar>0)/(skyvar+(skyvar==0))

    # Use diagonal of skycovar convolved with mean resolution of all fibers
    # first compute average resolution
    mean_res_data = np.mean(frame.resolution_data, axis=0)
    R = Resolution(mean_res_data)
    # compute convolved sky and ivar
    cskycovar = R.dot(skycovar).dot(R.T.todense())
    cskyvar = np.diagonal(cskycovar)
    cskyivar = (cskyvar > 0) / (cskyvar + (cskyvar == 0))

    # convert cskyivar to 2D; today it is the same for all spectra,
    # but that may not be the case in the future
    cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave)

    # Convolved sky
    cskyflux = np.zeros(frame.flux.shape)
    for i in range(frame.nspec):
        cskyflux[i] = frame.R[i].dot(skyflux)

    # need to do better here
    mask = (cskyivar == 0).astype(np.uint32)

    return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask, nrej=nout_tot)
Пример #4
0
def polynomial_fit(z, ez, xx, yy, degx, degy):
    """
    Computes and 2D polynomial fit of z as a function of (x,y) of degrees degx and degy

    Args:
        z : ND array
        ez : ND array of same shape as z, uncertainties on z
        x : ND array of same shape as z
        y : ND array of same shape as z
        degx : int (>=0), polynomial degree along x
        degy : int (>=0), polynomial degree along y

    Returns:
        coeff : 1D array of size (degx+1)*(degy+1) with polynomial coefficients (as defined by routine monomials)
        covariance : 2D array of covariance of coeff
        error_floor : float , extra uncertainty needed to get chi2/ndf=1
        polval : ND array of same shape as z with values of pol(x,y)
        mask : ND array of same shape as z indicating the masked data points in the fit

    """
    M = monomials(x=xx, y=yy, degx=degx, degy=degy)

    error_floor = 0.

    npar = M.shape[0]
    A = np.zeros((npar, npar))
    B = np.zeros((npar))

    mask = np.ones(z.shape).astype(int)
    for loop in range(100):  # loop to increase errors

        w = 1. / (ez**2 + error_floor**2)
        w[mask == 0] = 0.

        A *= 0.
        B *= 0.
        for k in range(npar):
            B[k] = np.sum(w * z * M[k])
            for l in range(k + 1):
                A[k, l] = np.sum(w * M[k] * M[l])
                if l != k: A[l, k] = A[k, l]
        coeff = cholesky_solve(A, B)
        polval = M.T.dot(coeff)

        # compute rchi2 with median
        ndata = np.sum(w > 0)
        rchi2 = 1.4826 * np.median(
            np.sqrt(w) * np.abs(z - polval)) * ndata / float(ndata - npar)
        # std chi2
        rchi2_std = np.sum(w * (z - polval)**2) / (ndata - npar)
        #print("#%d rchi2=%f rchi2_std=%f ngood=%d nbad=%d error floor=%f"%(loop,rchi2,rchi2_std,ndata,np.sum(w==0),error_floor))

        # reject huge outliers
        nbad = 0
        rvar = w * (z - polval)**2
        worst = np.argmax(rvar)
        if rvar[worst] > 25 * max(
                rchi2, 1.2):  # cap rchi2 if starting point is very bad
            #print("remove one bad measurement at %2.1f sigmas"%np.sqrt(rvar[worst]))
            mask[worst] = 0
            nbad = 1

        if rchi2 > 1:
            if nbad == 0 or loop > 5:
                error_floor += 0.002

        if rchi2 <= 1. and nbad == 0:
            break

    # rerun chol. solve to get covariance
    coeff, covariance = cholesky_solve_and_invert(A, B)

    return coeff, covariance, error_floor, polval, mask
Пример #5
0
def polynomial_fit(z,ez,xx,yy,degx,degy) :
    """
    Computes and 2D polynomial fit of z as a function of (x,y) of degrees degx and degy

    Args:
        z : ND array
        ez : ND array of same shape as z, uncertainties on z
        x : ND array of same shape as z
        y : ND array of same shape as z
        degx : int (>=0), polynomial degree along x
        degy : int (>=0), polynomial degree along y

    Returns:
        coeff : 1D array of size (degx+1)*(degy+1) with polynomial coefficients (as defined by routine monomials)
        covariance : 2D array of covariance of coeff
        error_floor : float , extra uncertainty needed to get chi2/ndf=1
        polval : ND array of same shape as z with values of pol(x,y)
        mask : ND array of same shape as z indicating the masked data points in the fit

    """
    M=monomials(x=xx,y=yy,degx=degx,degy=degy)

    error_floor = 0.

    npar=M.shape[0]
    A=np.zeros((npar,npar))
    B=np.zeros((npar))

    mask=np.ones(z.shape).astype(int)
    for loop in range(100) : # loop to increase errors

        w=1./(ez**2+error_floor**2)
        w[mask==0]=0.

        A *= 0.
        B *= 0.
        for k in range(npar) :
            B[k]=np.sum(w*z*M[k])
            for l in range(k+1) :
                A[k,l]=np.sum(w*M[k]*M[l])
                if l!=k : A[l,k]=A[k,l]
        coeff=cholesky_solve(A,B)
        polval = M.T.dot(coeff)

        # compute rchi2 with median
        ndata=np.sum(w>0)
        rchi2=1.4826*np.median(np.sqrt(w)*np.abs(z-polval))*ndata/float(ndata-npar)
        # std chi2
        rchi2_std = np.sum(w*(z-polval)**2)/(ndata-npar)
        #print("#%d rchi2=%f rchi2_std=%f ngood=%d nbad=%d error floor=%f"%(loop,rchi2,rchi2_std,ndata,np.sum(w==0),error_floor))

        # reject huge outliers
        nbad=0
        rvar=w*(z-polval)**2
        worst=np.argmax(rvar)
        if rvar[worst] > 25*max(rchi2,1.2) : # cap rchi2 if starting point is very bad
            #print("remove one bad measurement at %2.1f sigmas"%np.sqrt(rvar[worst]))
            mask[worst]=0
            nbad=1

        if rchi2>1 :
            if nbad==0 or loop>5 :
                error_floor+=0.002

        if rchi2<=1. and nbad==0 :
            break

    # rerun chol. solve to get covariance
    coeff,covariance=cholesky_solve_and_invert(A,B)


    return coeff,covariance,error_floor,polval,mask
Пример #6
0
def compute_sky(frame, nsig_clipping=4.) :
    """Compute a sky model.

    Input has to correspond to sky fibers only.
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log=get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave=frame.nwave
    nfibers=len(skyfibers)

    current_ivar=frame.ivar[skyfibers].copy()
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    sqrtw=np.sqrt(current_ivar)
    sqrtwflux=sqrtw*flux

    chi2=np.zeros(flux.shape)

    #debug
    #nfibers=min(nfibers,2)

    nout_tot=0
    for iteration in range(20) :

        A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        B=np.zeros((nwave))
        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))
        # loop on fiber to handle resolution
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d fiber %d"%(iteration,fiber))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]

            A = A+(sqrtwR.T*sqrtwR).tocsr()
            B += sqrtwR.T*sqrtwflux[fiber]

        log.info("iter %d solving"%iteration)

        skyflux=cholesky_solve(A.todense(),B)

        log.info("iter %d compute chi2"%iteration)

        for fiber in range(nfibers) :

            S = Rsky[fiber].dot(skyflux)
            chi2[fiber]=current_ivar[fiber]*(flux[fiber]-S)**2

        log.info("rejecting")

        nout_iter=0
        if iteration<1 :
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
            selection=np.where(nout_per_wave>0)[0]
            for i in selection :
                worst_entry=np.argmax(chi2[:,i])
                current_ivar[worst_entry,i]=0
                sqrtw[worst_entry,i]=0
                sqrtwflux[worst_entry,i]=0
                nout_iter += 1

        else :
            # remove all of them at once
            bad=(chi2>nsig_clipping**2)
            current_ivar *= (bad==0)
            sqrtw *= (bad==0)
            sqrtwflux *= (bad==0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave)
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))

        if nout_iter == 0 :
            break

    log.info("nout tot=%d"%nout_tot)


    # solve once again to get deconvolved sky variance
    skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B)

    #- sky inverse variance, but incomplete and not needed anyway
    # skyvar=np.diagonal(skycovar)
    # skyivar=(skyvar>0)/(skyvar+(skyvar==0))

    # Use diagonal of skycovar convolved with mean resolution of all fibers
    # first compute average resolution
    mean_res_data=np.mean(frame.resolution_data,axis=0)
    R = Resolution(mean_res_data)
    # compute convolved sky and ivar
    cskycovar=R.dot(skycovar).dot(R.T.todense())
    cskyvar=np.diagonal(cskycovar)
    cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0))

    # convert cskyivar to 2D; today it is the same for all spectra,
    # but that may not be the case in the future
    cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave)

    # Convolved sky
    cskyflux = np.zeros(frame.flux.shape)
    for i in range(frame.nspec):
        cskyflux[i] = frame.R[i].dot(skyflux)

    # need to do better here
    mask = (cskyivar==0).astype(np.uint32)

    return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask,
                    nrej=nout_tot)
Пример #7
0
def chi2_of_pca_on_line_ratios(list_of_results, lines, pca_on_line_ratios):
    nres = len(list_of_results)
    chi2 = np.zeros((nres))
    log = get_logger()
    refchi2 = list_of_results[0]["CHI2"]
    log.debug("starting")

    pca_lines = np.array(pca_on_line_ratios["lines"])
    log.debug("lines of prior = %s" % pca_lines)

    pca_mean_flux_ratios = np.array(pca_on_line_ratios["mean_flux_ratios"])
    pca_components = np.array(pca_on_line_ratios["components"])
    pca_mean_coef = np.array(pca_on_line_ratios["mean_coef"])
    pca_rms_coef = np.array(pca_on_line_ratios["rms_coef"])
    pca_min_coef = np.array(pca_on_line_ratios["min_coef"])
    pca_max_coef = np.array(pca_on_line_ratios["max_coef"])

    for res_index, result in zip(range(nres), list_of_results):
        # print 'result:',result
        # line flux ratios
        flux_ratios = np.zeros((nCk(len(pca_lines), 2)))
        ivar = np.zeros((nCk(len(pca_lines), 2)))
        i = 0
        for l in range(len(pca_lines)):
            for m in [x + l + 1 for x in range(len(pca_lines) - (l + 1))]:
                line = result["FLUX_%dA" % pca_lines[l]]
                line_next = result["FLUX_%dA" % pca_lines[m]]
                if line_next <= 0:
                    continue
                flux_ratios[i] = result["FLUX_%dA" % pca_lines[l]] / result["FLUX_%dA" % pca_lines[m]]
                sig_f1 = result["FLUX_ERR_%dA" % pca_lines[l]]
                sig_f2 = result["FLUX_ERR_%dA" % pca_lines[m]]
                if sig_f1 <= 0 or sig_f2 <= 0:
                    continue
                ivar[i] = (result["FLUX_%dA" % pca_lines[m]] ** 2) / (sig_f1 ** 2 + (flux_ratios[i]) ** 2 * sig_f2 ** 2)
                i = i + 1

        # scale according to pca mean flux
        a = np.sum(ivar * pca_mean_flux_ratios ** 2)
        if a == 0:
            log.warning("cannot compute pca prior for result #%d because null ivar" % res_index)
            continue
        scale = np.sum(ivar * flux_ratios * pca_mean_flux_ratios) / a
        if scale <= 0:
            log.warning("cannot compute pca prior for result #%d because scale=%f" % (res_index, scale))
            # log.warning("results = %s"%str(result))
            continue
        flux_ratios /= scale
        ivar *= scale ** 2

        residuals = flux_ratios - pca_mean_flux_ratios
        residuals[ivar == 0] = 0.0

        nc = pca_components.shape[0]
        A = np.zeros((nc, nc))
        B = np.zeros((nc))

        for i in range(nc):
            B[i] = np.sum(ivar * pca_components[i] * residuals)
            for j in range(nc):
                A[i, j] = np.sum(ivar * pca_components[i] * pca_components[j])
            # add weak prior to invert
            A[i, i] += 0.0001
        try:
            coefs, cov = cholesky_solve_and_invert(A, B)
            for i in range(nc):
                coef_chi2 = (
                    (coefs[i] - pca_mean_coef[i]) ** 2 / (cov[i, i] + pca_rms_coef[i] ** 2)
                    + (coefs[i] > pca_max_coef[i]) * (coefs[i] - pca_max_coef[i]) ** 2 / cov[i, i]
                    + (coefs[i] < pca_min_coef[i]) * (coefs[i] - pca_min_coef[i]) ** 2 / cov[i, i]
                )
                # log.debug("comp #%d meas coef=%f +- %f chi2=%f"%(i,coef,1./math.sqrt(coef_ivar),coef_chi2))
                chi2[res_index] += coef_chi2
        except:
            log.warning("cholesky failed")
            return 1000.0

        log.debug("res #%d line_ratio_pca_prior chi2=%f" % (res_index, chi2[res_index]))

    return chi2
Пример #8
0
def chi2_of_line_ratio_pca_prior(list_of_results, lines, line_ratio_pca_prior):
    nres = len(list_of_results)
    chi2 = np.zeros((nres))
    log = get_logger()
    refchi2 = list_of_results[0]["CHI2"]
    log.debug("starting")

    pca_lines = np.array(line_ratio_pca_prior["lines"])
    log.debug("lines of prior = %s" % pca_lines)

    pca_mean_flux = np.array(line_ratio_pca_prior["mean_flux"])
    pca_components = np.array(line_ratio_pca_prior["components"])
    pca_mean_coef = np.array(line_ratio_pca_prior["mean_coef"])
    pca_rms_coef = np.array(line_ratio_pca_prior["rms_coef"])
    pca_min_coef = np.array(line_ratio_pca_prior["min_coef"])
    pca_max_coef = np.array(line_ratio_pca_prior["max_coef"])

    for res_index, result in zip(range(nres), list_of_results):
        # fluxes
        flux = np.zeros((pca_lines.size))
        ivar = np.zeros((pca_lines.size))
        for i in range(pca_lines.size):
            err = result["FLUX_ERR_%dA" % pca_lines[i]]
            if err <= 0:
                continue
            flux[i] = result["FLUX_%dA" % pca_lines[i]]
            ivar[i] = 1.0 / err ** 2
        # scale according to pca mean flux
        a = np.sum(ivar * pca_mean_flux ** 2)
        if a == 0:
            log.warning("cannot compute pca prior for result #%d because null ivar" % res_index)
            continue
        scale = np.sum(ivar * flux * pca_mean_flux) / a
        if scale <= 0:
            log.warning("cannot compute pca prior for result #%d because scale=%f" % (res_index, scale))
            # log.warning("results = %s"%str(result))
            continue
        flux /= scale
        ivar *= scale ** 2
        residuals = flux - pca_mean_flux
        residuals[ivar == 0] = 0.0

        # import pylab
        # pylab.errorbar(pca_lines[ivar>0],flux[ivar>0],1./np.sqrt(ivar[ivar>0]),fmt="o",c="b")
        # pylab.plot(pca_lines,pca_mean_flux,"o",c="r")
        # pylab.show()

        nc = pca_components.shape[0]
        A = np.zeros((nc, nc))
        B = np.zeros((nc))

        for i in range(nc):
            B[i] = np.sum(ivar * pca_components[i] * residuals)
            for j in range(nc):
                A[i, j] = np.sum(ivar * pca_components[i] * pca_components[j])
            # add weak prior to invert
            A[i, i] += 0.0001
        try:
            coefs, cov = cholesky_solve_and_invert(A, B)
            for i in range(nc):
                coef_chi2 = (
                    (coefs[i] - pca_mean_coef[i]) ** 2 / (cov[i, i] + pca_rms_coef[i] ** 2)
                    + (coefs[i] > pca_max_coef[i]) * (coefs[i] - pca_max_coef[i]) ** 2 / cov[i, i]
                    + (coefs[i] < pca_min_coef[i]) * (coefs[i] - pca_min_coef[i]) ** 2 / cov[i, i]
                )
                # log.debug("comp #%d meas coef=%f +- %f chi2=%f"%(i,coef,1./math.sqrt(coef_ivar),coef_chi2))
                chi2[res_index] += coef_chi2
        except:
            log.warning("cholesky failed")
            return 1000.0

        """
        # loop on pca components
        for i in range(pca_components.shape[0]) :
            # compute pca coefs  
            coef_ivar=np.sum(ivar*pca_components[i]**2)
            if coef_ivar==0 :
                continue
            b=np.sum(ivar*pca_components[i]*residuals)
            coef=b/coef_ivar
            coef_chi2 = (coef-pca_mean_coef[i])**2/(1./coef_ivar+pca_rms_coef[i]**2)
            
            #log.debug("comp #%d meas coef=%f +- %f chi2=%f"%(i,coef,1./math.sqrt(coef_ivar),coef_chi2))
            chi2[res_index] += coef_chi2
        """
        log.debug("res #%d line_ratio_pca_prior chi2=%f" % (res_index, chi2[res_index]))

    return chi2
Пример #9
0
def zz_line_fit(
    wave,
    flux,
    ivar,
    resolution,
    lines,
    vdisp,
    line_ratio_priors,
    z,
    wave_range,
    x,
    gx,
    groups,
    fixed_line_ratio=None,
    z_for_range=None,
):
    """
    internal routine : fit line amplitudes and return delta chi2 with respect to zero amplitude
    """
    log = get_logger()

    # show=False
    # z=0.393299 ; group_to_show=6 ; show=True
    # z=1.452752 ; group_to_show=0 ; show=True
    # z=1.334595
    # z=0.857396 ; show=True

    redshifted_lines = lines * (1 + z)
    redshifted_sigmas = lines * (vdisp / 2.9970e5) * (1 + z)  # this is the sigmas of all lines
    wave_hw = wave_range / 2.0
    nframes = len(flux)

    # compute profiles, and fill A and B matrices
    # A and B are matrix and vector so that line amplitudes and continuum (X) are solution of A*X = B
    nlines = lines.size
    npar = nlines + len(groups)  # one continuum parameter per group

    A = np.zeros((npar, npar))
    B = np.zeros((npar))

    # do it per group to account for overlapping lines
    for group_index in groups:
        lines_in_group = groups[group_index]
        if z_for_range is None:
            # to test if line is included:
            tl1 = np.min(redshifted_lines[lines_in_group] - 1 * redshifted_sigmas[lines_in_group])
            tl2 = np.min(redshifted_lines[lines_in_group] + 1 * redshifted_sigmas[lines_in_group])
            # actual fit range (larger because we want to fit the continuum at the same time)
            l1 = np.min(redshifted_lines[lines_in_group] - wave_hw)
            l2 = np.max(redshifted_lines[lines_in_group] + wave_hw)
        else:  # needed in a refined fit to fix the wave range whatever the z
            # to test if line is included:
            tl1 = np.min(
                redshifted_lines[lines_in_group] * (1 + z_for_range) / (1 + z) - 1 * redshifted_sigmas[lines_in_group]
            )
            tl2 = np.min(
                redshifted_lines[lines_in_group] * (1 + z_for_range) / (1 + z) + 1 * redshifted_sigmas[lines_in_group]
            )
            # actual fit range (larger because we want to fit the continuum at the same time)
            l1 = np.min(redshifted_lines[lines_in_group] * (1 + z_for_range) / (1 + z) - wave_hw)
            l2 = np.max(redshifted_lines[lines_in_group] * (1 + z_for_range) / (1 + z) + wave_hw)

        nlines_in_group = lines_in_group.size

        for frame_index in range(nframes):

            frame_wave = wave[frame_index]
            frame_ivar = ivar[frame_index]
            # test :
            if np.sum((frame_wave >= tl1) & (frame_wave <= tl2) & (frame_ivar > 0)) == 0:
                continue
            # wavelength that matter :
            wave_index = np.where((frame_wave >= l1) & (frame_wave <= l2) & (frame_ivar > 0))[0]
            if wave_index.size == 0:
                continue
            frame_wave = frame_wave[wave_index]
            frame_ivar = frame_ivar[wave_index]
            frame_flux = flux[frame_index][wave_index]

            # this is the block of the diagonal sparse matrix corresponding to the wavelength of interest :
            frame_res_for_group = scipy.sparse.dia_matrix(
                (resolution[frame_index].data[:, wave_index], resolution[frame_index].offsets),
                shape=(wave_index.size, wave_index.size),
            )

            # compute profiles
            profile_of_lines = np.zeros((lines_in_group.size, frame_wave.size))
            for i, line_index, line, sig in zip(
                range(lines_in_group.size),
                lines_in_group,
                redshifted_lines[lines_in_group],
                redshifted_sigmas[lines_in_group],
            ):
                prof = np.interp((frame_wave - line) / sig, x, gx) / sig
                # convolve here with the spectrograph resolution
                profile_of_lines[i] = frame_res_for_group.dot(prof)

            # if show and group_index==group_to_show :
            #    print "DEBUGGING !!!"
            #    w=frame_wave
            #    f=frame_flux
            #    p0=profile_of_lines[0]
            #    if profile_of_lines.shape[0]>1 :
            #        p1=profile_of_lines[1]
            #    else :
            #        p1=None

            # fill amplitude system (A and B) :
            for i in range(nlines_in_group):
                B[lines_in_group[i]] += np.sum(frame_ivar * profile_of_lines[i] * frame_flux)
                for j in range(nlines_in_group):
                    A[lines_in_group[i], lines_in_group[j]] += np.sum(
                        frame_ivar * profile_of_lines[i] * profile_of_lines[j]
                    )

            # continuum part
            cont_index = nlines + group_index
            B[cont_index] += np.sum(frame_ivar * frame_flux)
            A[cont_index, cont_index] += np.sum(frame_ivar)
            for i in range(nlines_in_group):
                tmp = np.sum(frame_ivar * profile_of_lines[i])
                A[lines_in_group[i], cont_index] += tmp
                A[cont_index, lines_in_group[i]] += tmp

    Atofit = A.copy()
    if fixed_line_ratio is not None:
        for fixed in fixed_line_ratio:
            i = fixed[0]
            j = fixed[1]
            ratio = fixed[2]

            # f0=ratio*f1
            # chi2 = (f0-ratio*f1)**2
            weight = 100000.0

            Atofit[i, i] += weight
            Atofit[j, j] += weight * ratio ** 2

            Atofit[i, j] -= weight * ratio
            Atofit[j, i] -= weight * ratio

    # solving outside of group to simplify the code even if it is supposedly slower (the matrix is nearly diagonal)

    # give value to undefined lines (it's not a problem)
    for i in range(Atofit.shape[0]):
        if Atofit[i, i] == 0:
            Atofit[i, i] = 1

    # solve the system
    try:
        params, cov = cholesky_solve_and_invert(Atofit, B)
    except:
        log.warning("cholesky_solve failed")
        # print sys.exc_info()
        return 1e5, np.zeros((lines.size)), np.zeros((lines.size))

    line_amplitudes = np.zeros((lines.size))
    line_amplitudes_ivar = np.zeros((lines.size))

    for i in range(lines.size):
        if A[i, i] == 0:  # no data
            continue
        line_amplitudes[i] = params[i]
        line_amplitudes_ivar[i] = 1.0 / cov[i, i]

    # apply priors (outside of loop on groups)
    if line_ratio_priors is not None:
        for prior in line_ratio_priors:
            line_index = int(prior[0])
            other_line_index = int(prior[1])
            min_ratio = float(prior[2])
            max_ratio = float(prior[3])
            conserve_flux = prior[4]

            # first ignore if one of the lines is not measured
            if line_amplitudes_ivar[line_index] == 0 or line_amplitudes_ivar[other_line_index] == 0:
                continue

            # if two lines are negatives ignore this
            if line_amplitudes[line_index] <= 0 and line_amplitudes[other_line_index] <= 0:
                continue

            # the ratio prior is on flux(this_line)/flux(other_line)
            if conserve_flux:

                total_flux = line_amplitudes[line_index] + line_amplitudes[other_line_index]

                if line_amplitudes[other_line_index] <= 0:
                    ratio = 10000.0
                else:
                    ratio = line_amplitudes[line_index] / line_amplitudes[other_line_index]

                if ratio > max_ratio:
                    line_amplitudes[line_index] = max_ratio / (1.0 + max_ratio) * total_flux
                    line_amplitudes[other_line_index] = 1.0 / (1.0 + max_ratio) * total_flux
                elif ratio < min_ratio:
                    line_amplitudes[line_index] = min_ratio / (1.0 + min_ratio) * total_flux
                    line_amplitudes[other_line_index] = 1.0 / (1.0 + min_ratio) * total_flux
            else:
                # apply ratio to line with lowest snr :
                this_snr = line_amplitudes[line_index] * math.sqrt(line_amplitudes_ivar[line_index])
                other_snr = line_amplitudes[other_line_index] * math.sqrt(line_amplitudes_ivar[other_line_index])

                if this_snr < other_snr:
                    apply_to_index = line_index
                    ratio = line_amplitudes[line_index] / line_amplitudes[other_line_index]
                    if ratio < min_ratio:
                        line_amplitudes[line_index] = line_amplitudes[other_line_index] * min_ratio
                    elif ratio > max_ratio:
                        line_amplitudes[line_index] = line_amplitudes[other_line_index] * max_ratio
                else:
                    apply_to_index = other_line_index
                    # and need to invert ratio
                    min_ratio_tmp = 1.0 / (max_ratio)
                    max_ratio = 1.0 / (min_ratio + 0.001 * (min_ratio == 0))
                    min_ratio = min_ratio_tmp
                    ratio = line_amplitudes[other_line_index] / line_amplitudes[line_index]
                    if ratio < min_ratio:
                        line_amplitudes[other_line_index] = line_amplitudes[line_index] * min_ratio
                    elif ratio > max_ratio:
                        line_amplitudes[other_line_index] = line_amplitudes[line_index] * max_ratio

    number_of_free_params = np.sum(np.diag(A) > 0)

    # force non-negative here
    # this makes sure chi2 is same as without any signal
    for group_index in groups:
        lines_in_group = groups[group_index]
        if np.sum(line_amplitudes[lines_in_group] < 0) > 0:  # has neg. amplitude
            line_amplitudes[lines_in_group] = 0.0
            params[lines_in_group] = 0.0
            cont_index = nlines + group_index
            params[cont_index] = 0.0
            number_of_free_params -= np.sum(line_amplitudes_ivar[lines_in_group] > 0) + 1  # lines and continuum

    # add chi2 for this group
    """
    chi2 = sum w*(data - amp*prof)**2
    = sum w*data**2 + amp**2 sum w prof**2 - 2 * amp * sum w*data*prof
    = chi20 + amp^T A amp - 2 B^T amp
    
    min chi2 for amp = A^-1 B
    min chi2 = chi20 - B^T A^-1 B
    
    BUT, we may have changed the amplitudes with the prior !
    """

    # apply delta chi2
    dchi2 = A.dot(params).T.dot(params) - 2 * np.inner(B, params)
    # add number of free params to allow chi2 comparison
    dchi2 += number_of_free_params

    # debugging
    # if np.sum(line_amplitudes)==0 and dchi2 != 0 :
    #     print "THIS IS BIZARRE"
    #     print line_amplitudes
    #     print line_amplitudes_ivar
    #     print params
    #     print number_of_free_params
    #     sys.exit(12)

    """
    if show :
            import pylab
            continuum=params[nlines:]
            pylab.plot(w,f)
            pylab.plot(w,continuum[group_to_show]*np.ones((w.size)))
            pylab.plot(w,continuum[group_to_show]+line_amplitudes[groups[group_to_show][0]]*p0)
            #dchi2 = A.dot(params).T.dot(params) - 2*np.inner(B,params)
            print "dchi2=",dchi2
            if group_to_show==0 :
                pylab.plot(w,continuum[0]+line_amplitudes[1]*p1)
                pylab.plot(w,continuum[0]+line_amplitudes[0]*p0+line_amplitudes[1]*p1)
                print "DEBUGGING!!"
                print "oIIflux=",line_amplitudes[0]+line_amplitudes[1],"ratio=",line_amplitudes[0]/line_amplitudes[1]
                print "integral=",np.sum(np.gradient(w)*(line_amplitudes[0]*p0+line_amplitudes[1]*p1))
            pylab.show()
    """

    return dchi2, line_amplitudes, line_amplitudes_ivar