def test_single_gauss_1d_varunc_logweights():
    # Generate data from a single Gaussian, recover mean and variance, with weights
    ndata = 3001
    ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    # twice oversample > 0
    ydata[numpy.arange(3001) > 2000]=\
        numpy.fabs(ydata[numpy.arange(3001) > 2000])
    weight = numpy.ones(ndata)
    weight[ydata[:, 0] > 0] = 0.5
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean = numpy.atleast_2d(numpy.mean(ydata) + numpy.std(ydata))
    initcovar = numpy.atleast_3d(3. * numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          weight=numpy.log(weight),
                          logweight=True)
    # Test
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean - 0.
    ) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties'
    assert numpy.fabs(
        initcovar - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties'
    return None
Пример #2
0
def test_single_gauss_2d_nounc():
    # Generate data from a single Gaussian, recover mean and variance
    ndata = 3001
    ydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]])
    ycovar = numpy.zeros_like(ydata)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\
                                   +numpy.std(ydata,axis=0))
    initcovar = numpy.atleast_3d(numpy.cov(ydata, rowvar=False)).T
    # Run XD
    extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar)
    # Test
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean[0, 0] - 1.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initmean[0, 1] - 2.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 0] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 1, 1] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 1] - 0.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    return None
Пример #3
0
def XDapogee(options,args):
    #First load the chains
    savefile= open(args[0],'rb')
    thesesamples= pickle.load(savefile)
    savefile.close()
    vcs= numpy.array([s[0] for s in thesesamples])*_APOGEEREFV0/_REFV0
    dvcdrs= numpy.array([s[6] for s in thesesamples])*30. #To be consistent with this project's dlnvcdlnr 
    print numpy.mean(vcs)
    print numpy.mean(dvcdrs)
    #Now fit XD to the 2D PDFs
    ydata= numpy.zeros((len(vcs),2))
    ycovar= numpy.zeros((len(vcs),2))
    ydata[:,0]= numpy.log(vcs)
    ydata[:,1]= dvcdrs
    vcxamp= numpy.ones(options.g)/options.g
    vcxmean= numpy.zeros((options.g,2))
    vcxcovar= numpy.zeros((options.g,2,2))
    for ii in range(options.g):
        vcxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4.
        vcxcovar[ii,0,0]= numpy.var(ydata[:,0])
        vcxcovar[ii,1,1]= numpy.var(ydata[:,1])
    extreme_deconvolution.extreme_deconvolution(ydata,ycovar,
                                                vcxamp,vcxmean,vcxcovar)
    save_pickles(options.plotfile,
                 vcxamp,vcxmean,vcxcovar)
    print vcxamp
    print vcxmean[:,0]
    print vcxmean[:,1]
    return None
Пример #4
0
def test_single_gauss_2d_offdiagunc():
    # Generate data from a single Gaussian, recover mean and variance
    ndata= 3001
    ydata= numpy.random.normal(size=(ndata,2))+numpy.array([[1.,2.]])
    tycovar= numpy.ones_like(ydata)\
        *numpy.random.uniform(size=(ndata,2))/2.
    ydata+= numpy.random.normal(size=(ndata,2))*numpy.sqrt(tycovar)
    ycovar= numpy.empty((ndata,2,2))
    for ii in range(ndata):
        ycovar[ii]= numpy.diag(tycovar[ii])
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\
                                   +numpy.std(ydata,axis=0))
    initcovar= numpy.atleast_3d(numpy.cov(ydata,rowvar=False)).T
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar)
    # Test
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean[0,0]-1.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initmean[0,1]-2.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,0,0]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,1,1]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,0,1]-0.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    return None
Пример #5
0
def test_fixmean_single_gauss_1d_nounc():
    # Generate data from a single Gaussian, recover variance, fixing mean
    ndata = 3001
    ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T + 1.
    ycovar = numpy.zeros_like(ydata)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean = numpy.array([[1.]])
    initcovar = numpy.atleast_3d(3. * numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          fixmean=True)
    # Test
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(initmean -
                      1.) < 10.**-10., 'XD did not fixmean for single Gaussian'
    assert numpy.fabs(
        initcovar - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties, fixing mean'
    return None
Пример #6
0
def test_single_gauss_2d_diagunc_proj():
    # Generate data from a single Gaussian, recover mean and variance
    ndata= 3001
    tydata= numpy.random.normal(size=(ndata,2))+numpy.array([[1.,2.]])
    # Randomly project
    ydata= numpy.zeros((ndata,1))
    proj_x= numpy.random.binomial(2,0.5,ndata)
    ydata[proj_x==0,0]= tydata[proj_x==0,0]
    ydata[proj_x==1,0]= tydata[proj_x==1,1]
    ydata[proj_x==2,0]= tydata[proj_x==2,0]+tydata[proj_x==2,1]
    projection= numpy.empty((ndata,1,2))
    projection[proj_x==0]= numpy.array([[[1.,0.]]])
    projection[proj_x==1]= numpy.array([[[0.,1.]]])
    projection[proj_x==2]= numpy.array([[[1.,1.]]])
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.array([[0.,1.]])
    initcovar= numpy.array([[[2.,-1.],[-1.,3.]]])
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          projection=projection)
    # Test
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean[0,0]-1.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initmean[0,1]-2.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,0,0]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,1,1]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,0,1]-0.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    return None
Пример #7
0
def test_fixamp_alt_dual_gauss_1d_nounc():
    # Generate data from two Gaussians, recover mean and variance
    ndata= 3001
    amp_true= 0.3
    assign= numpy.random.binomial(1,1.-amp_true,ndata)
    ydata= numpy.zeros((ndata,1))
    ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-2.
    ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1.
    ycovar= numpy.zeros_like(ydata)
    # initialize fit
    K= 2
    initamp= numpy.array([amp_true,1.-amp_true])
    initmean= numpy.array([[-1.],[2.]])
    initcovar= numpy.zeros((K,1,1))
    numpy.random.uniform() # hack to get diff init
    for kk in range(K):
        initcovar[kk]= numpy.mean(3.*numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          fixamp=[True,False]) # should be same as =True
    # Test
    tol= 12./numpy.sqrt(ndata)
    first= initamp < 0.5
    assert numpy.fabs(initamp[first]-amp_true) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties'
    assert numpy.fabs(initmean[first]--2.) < tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp'
    assert numpy.fabs(initcovar[first]-1.) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp'
    second= initamp >= 0.5
    assert numpy.fabs(initamp[second]-(1.-amp_true)) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties'
    assert numpy.fabs(initmean[second]-1.) < 2.*tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp'
    assert numpy.fabs(initcovar[second]-4.) < 2.*tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp'
    return None
Пример #8
0
def test_single_gauss_1d_varunc_log_loglikeonly():
    # Same as in test_oned, but now also log
    ndata= 3001
    ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata)+numpy.std(ydata))
    initcovar= numpy.atleast_3d(3.*numpy.var(ydata))
    # Run XD
    logfile= 'test_log'
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          logfile=logfile)
    # First test that fit worked
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties'
    assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties'
    # Now compute the likelihood and check that it is the same as in the logfile
    lnl= extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                               likeonly=True)
    with open(logfile+'_loglike.log') as log:
        lines= log.readlines()
        assert numpy.fabs(float(lines[-3])-lnl) < 10.**-6., 'loglike computed using likeonly is not the same as in the logfile'
    os.remove(logfile+'_c.log')
    os.remove(logfile+'_loglike.log')
    return None
Пример #9
0
def test_fixcovar_dual_gauss_1d_nounc():
    # Generate data from two Gaussians, recover mean and variance
    ndata= 3001
    amp_true= 0.3
    assign= numpy.random.binomial(1,1.-amp_true,ndata)
    ydata= numpy.zeros((ndata,1))
    ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-2.
    ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1.
    ycovar= numpy.zeros_like(ydata)
    # initialize fit
    K= 2
    initamp= numpy.ones(K)/float(K)
    initmean= numpy.array([[-1.],[2.]])
    initcovar= numpy.zeros((K,1,1))
    for kk in range(K):
        if kk == 0:
            initcovar[kk]= 1.
        elif kk == 1:
            initcovar[kk]= 4.
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          fixcovar=True)
    # Test
    tol= 12./numpy.sqrt(ndata)
    first= initamp < 0.5
    assert numpy.fabs(initamp[first]-amp_true) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing covar'
    assert numpy.fabs(initmean[first]--2.) < tol, 'XD does not recover mean for dual Gaussian w/o uncertainties, fixing covar'
    assert numpy.fabs(initcovar[first]-1.) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing mean'
    second= initamp >= 0.5
    assert numpy.fabs(initamp[second]-(1.-amp_true)) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing covar'
    assert numpy.fabs(initmean[second]-1.) < 2.*tol, 'XD does not recover mean for dual Gaussian w/o uncertainties'
    assert numpy.fabs(initcovar[second]-4.) < 10.**-10., 'XD did not fixcovar for dual Gaussian w/o uncertainties'
    return None
Пример #10
0
def test_single_gauss_1d_varunc_log():
    # Same as in test_oned, but now also log
    ndata= 3001
    ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata)+numpy.std(ydata))
    initcovar= numpy.atleast_3d(3.*numpy.var(ydata))
    # Run XD
    logfile= 'test_log'
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          logfile=logfile)
    # First test that fit worked
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties'
    assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties'
    # Now test that the logfiles exist
    assert os.path.exists(logfile+'_c.log'), 'XD did not produce _c.log logfile when asked'
    num_lines= sum(1 for line in open(logfile+'_c.log'))
    assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be"
    assert os.path.exists(logfile+'_loglike.log'), 'XD did not produce _loglike.log logfile when asked'
    num_lines= sum(1 for line in open(logfile+'_loglike.log'))
    assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be"
    os.remove(logfile+'_c.log')
    os.remove(logfile+'_loglike.log')
    return None
Пример #11
0
def test_dual_gauss_1d_constunc():
    # Generate data from two Gaussians, recover mean and variance
    ndata= 3001
    amp_true= 0.3
    assign= numpy.random.binomial(1,1.-amp_true,ndata)
    ydata= numpy.zeros((ndata,1))
    ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-2.
    ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1.
    ycovar= numpy.ones_like(ydata)*0.25
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 2
    initamp= numpy.ones(K)/float(K)
    initmean= numpy.array([[-1.],[0.]])
    initcovar= numpy.zeros((K,1,1))
    for kk in range(K):
        initcovar[kk]= numpy.mean(3.*numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar)
    # Test
    tol= 20./numpy.sqrt(ndata)
    first= initamp < 0.5
    assert numpy.fabs(initamp[first]-amp_true) < tol, 'XD does not recover correct amp for dual Gaussian w/ constant uncertainties'
    assert numpy.fabs(initmean[first]--2.) < tol, 'XD does not recover correct mean for dual Gaussian w/ constant uncertainties'
    assert numpy.fabs(initcovar[first]-1.) < tol, 'XD does not recover correct variance for dual Gaussian w/ constant uncertainties'
    second= initamp >= 0.5
    assert numpy.fabs(initamp[second]-(1.-amp_true)) < tol, 'XD does not recover correct amp for dual Gaussian w/ constant uncertainties'
    assert numpy.fabs(initmean[second]-1.) < 2.*tol, 'XD does not recover correct mean for dual Gaussian w/ constant uncertainties'
    assert numpy.fabs(initcovar[second]-4.) < 2.*tol, 'XD does not recover correct variance for dual Gaussian w/ constant uncertainties'
    return None
Пример #12
0
def test_single_gauss_2d_diagunc_logfile():
    # Generate data from a single Gaussian, recover mean and variance
    # Also log
    ndata= 3001
    ydata= numpy.random.normal(size=(ndata,2))+numpy.array([[1.,2.]])
    ycovar= numpy.ones_like(ydata)\
        *numpy.random.uniform(size=(ndata,2))/2.
    ydata+= numpy.random.normal(size=(ndata,2))*numpy.sqrt(ycovar)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\
                                   +numpy.std(ydata,axis=0))
    initcovar= numpy.atleast_3d(numpy.cov(ydata,rowvar=False)).T
    # Run XD
    logfile= 'test_log'
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          logfile=logfile)
    # First test that the fit worked
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean[0,0]-1.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initmean[0,1]-2.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,0,0]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,1,1]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(initcovar[0,0,1]-0.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    # Now test that the logfiles exist
    assert os.path.exists(logfile+'_c.log'), 'XD did not produce _c.log logfile when asked'
    num_lines= sum(1 for line in open(logfile+'_c.log'))
    assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be"
    assert os.path.exists(logfile+'_loglike.log'), 'XD did not produce _loglike.log logfile when asked'
    num_lines= sum(1 for line in open(logfile+'_loglike.log'))
    assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be"
    os.remove(logfile+'_c.log')
    os.remove(logfile+'_loglike.log')
    return None
Пример #13
0
def test_single_gauss_1d_varunc_logweights():
    # Generate data from a single Gaussian, recover mean and variance, with weights
    ndata= 3001
    ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    # twice oversample > 0
    ydata[numpy.arange(3001) > 2000]=\
        numpy.fabs(ydata[numpy.arange(3001) > 2000])
    weight= numpy.ones(ndata)
    weight[ydata[:,0]>0]= 0.5
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata)+numpy.std(ydata))
    initcovar= numpy.atleast_3d(3.*numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          weight=numpy.log(weight),logweight=True)
    # Test
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties'
    assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties'
    return None
def test_triple_gauss_1d_varunc_alsow():
    # Generate data from two Gaussians, recover mean and variance
    ndata = 3001
    amp_true = [0.3, 0.1, 0.6]
    assign = numpy.random.choice(numpy.arange(3), p=amp_true, size=ndata)
    ydata = numpy.zeros((ndata, 1))
    ydata[assign == 0,
          0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 4.
    ydata[assign == 1,
          0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1.
    ydata[assign == 2,
          0] = numpy.random.normal(size=numpy.sum(assign == 2)) * 1.5 + 8.
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 3
    initamp = numpy.ones(K) / float(K)
    initmean = numpy.array([[-1.], [0.], [1.]])
    initcovar = numpy.zeros((K, 1, 1))
    for kk in range(K):
        initcovar[kk] = numpy.mean(3. * numpy.var(ydata))
    # Run XD, w shouldn't make much difference
    extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, w=0.1)
    # Test
    tol = 25. / numpy.sqrt(ndata)
    first = initamp > 0.5
    assert numpy.fabs(
        initamp[first] - amp_true[2]
    ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties'
    assert numpy.fabs(
        initmean[first] - 8.
    ) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties'
    assert numpy.fabs(
        initcovar[first] - 1.5**2.
    ) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties'
    second = (initamp <= 0.5) * (initamp > 0.2)
    assert numpy.fabs(
        initamp[second] - amp_true[0]
    ) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initmean[second] - -4.
    ) < 2. * tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initcovar[second] - 1.
    ) < 2. * tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    third = (initamp <= 0.2)
    assert numpy.fabs(
        initamp[third] - amp_true[1]
    ) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initmean[third] - 1.
    ) < 4. * tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initcovar[third] - 4.
    ) < 4. * tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    return None
Пример #15
0
def fit_gaia_baseline(datafile, output_prefix, K, epochs, w_reg,
                      k_means_iters):
    data = np.load(datafile)

    train_data = SGDDeconvDataset(torch.Tensor(data['X_train']),
                                  torch.Tensor(data['C_train']))

    loader = data_utils.DataLoader(train_data,
                                   batch_size=5000,
                                   num_workers=4,
                                   shuffle=True)

    start_time = time.time()
    counts, centroids = minibatch_k_means(loader, k=K, max_iters=10)

    weights = (counts / counts.sum()).numpy()
    means = centroids.numpy()
    covars = np.array(K * [np.eye(7)])

    ll = extreme_deconvolution(data['X_train'],
                               data['C_train'],
                               weights,
                               means,
                               covars,
                               w=w_reg,
                               logfile=str(output_prefix) + '_log',
                               maxiter=epochs)

    end_time = time.time()

    train_score = ll * data['X_train'].shape[0]

    val_ll = extreme_deconvolution(data['X_val'],
                                   data['C_val'],
                                   weights,
                                   means,
                                   covars,
                                   w=w_reg,
                                   likeonly=True)
    val_score = val_ll * data['X_val'].shape[0]

    print('Training score: {}'.format(train_score))
    print('Val score: {}'.format(val_score))

    results = {
        'start_time': start_time,
        'end_time': end_time,
        'train_score': train_score,
        'val_score': val_score,
    }

    json.dump(results, open(str(output_prefix) + '_results.json', mode='w'))
    np.savez(output_prefix + '_params.npz',
             weights=weights,
             means=means,
             covar=covars)
Пример #16
0
def _xdFit(X, XErr, nGauss, n_iter=10):
    gmm = GMM(nGauss, n_iter=n_iter, covariance_type='full').fit(X)
    amp = gmm.weights_
    mean = gmm.means_
    covar = gmm.covars_
    xd.extreme_deconvolution(X, XErr, amp, mean, covar)
    clf = XDGMM(nGauss)
    clf.alpha = amp
    clf.mu = mean
    clf.V = covar
    return clf
Пример #17
0
def test_single_gauss_2d_diagunc_logfile():
    # Generate data from a single Gaussian, recover mean and variance
    # Also log
    ndata = 3001
    ydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]])
    ycovar= numpy.ones_like(ydata)\
        *numpy.random.uniform(size=(ndata,2))/2.
    ydata += numpy.random.normal(size=(ndata, 2)) * numpy.sqrt(ycovar)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\
                                   +numpy.std(ydata,axis=0))
    initcovar = numpy.atleast_3d(numpy.cov(ydata, rowvar=False)).T
    # Run XD
    logfile = 'test_log'
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          logfile=logfile)
    # First test that the fit worked
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean[0, 0] - 1.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initmean[0, 1] - 2.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 0] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 1, 1] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 1] - 0.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    # Now test that the logfiles exist
    assert os.path.exists(
        logfile + '_c.log'), 'XD did not produce _c.log logfile when asked'
    num_lines = sum(1 for line in open(logfile + '_c.log'))
    assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be"
    assert os.path.exists(
        logfile +
        '_loglike.log'), 'XD did not produce _loglike.log logfile when asked'
    num_lines = sum(1 for line in open(logfile + '_loglike.log'))
    assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be"
    os.remove(logfile + '_c.log')
    os.remove(logfile + '_loglike.log')
    return None
Пример #18
0
def test_fixmean_fixone_dual_gauss_1d_nounc():
    # Generate data from two Gaussians, recover mean and variance
    ndata = 3001
    amp_true = 0.3
    assign = numpy.random.binomial(1, 1. - amp_true, ndata)
    ydata = numpy.zeros((ndata, 1))
    ydata[assign == 0,
          0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 2.
    ydata[assign == 1,
          0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1.
    ycovar = numpy.zeros_like(ydata)
    # initialize fit
    K = 2
    initamp = numpy.ones(K) / float(K)
    initmean = numpy.zeros((K, 1))
    initcovar = numpy.zeros((K, 1, 1))
    for kk in range(K):
        if kk == 0:
            initmean[kk] = -2.
        elif kk == 1:
            initmean[kk] = 1.5
        initcovar[kk] = numpy.mean(3. * numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          fixmean=[True, False])
    # Test
    tol = 12. / numpy.sqrt(ndata)
    first = initamp < 0.5
    assert numpy.fabs(
        initamp[first] - amp_true
    ) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing one mean'
    assert numpy.fabs(
        initmean[first] - -2.
    ) < 10.**-10., 'XD did not fixmean for dual Gaussian w/o uncertainties'
    assert numpy.fabs(
        initcovar[first] - 1.
    ) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing one mean'
    second = initamp >= 0.5
    assert numpy.fabs(
        initamp[second] - (1. - amp_true)
    ) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing one mean'
    assert numpy.fabs(
        initmean[second] - 1.
    ) < 2. * tol, 'XD does not recover mean  for dual Gaussian w/o uncertainties, fixing one mean'
    assert numpy.fabs(
        initcovar[second] - 4.
    ) < 2. * tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing one mean'
    return None
Пример #19
0
def XD_ND_Ncomp(data, covar, n_components:int=3, init_guess='default', print_init=False, plot=True):
    """
    Input:
    data: (ndata, ndim) 
    covar: (ndata, ndim, ndim)
    
    n_components: number of components to fit
    
    """
    
    ### initialize fit with GMM
    K= n_components
    initamp= np.ones(K)/float(K)
    if init_guess == 'default':
        initmean, initcovar = initial_guess_from_GMM(data,n_components)
    else:
        print('manual init')
        initmean, initcovar = init_guess
        
    if print_init:
        print('initial')
        print('initamp: ',initamp)
        print('initmean: ',initmean)
        print('initcovar: ',initcovar)
        print()

        
        print('ydata.shape: ', ydata.shape)
        print('ycovar.shape: ', ycovar.shape)
        print('initamp.shape: ', initamp.shape)
        print('initmean.shape: ', initmean.shape)
        print('initcovar.shape: ', initcovar.shape)
        print()
        
    # Running XD
    extreme_deconvolution(data,covar,initamp,initmean,initcovar,maxsnm=True)
   
    print('XD - fit')        
    print('amp: ',initamp)
    print('mean: ',initmean)
    print('cov:',initcovar)
    
    if plot:
        # Plotting the results
        plt.scatter(data[:,0], data[:,1])

        for comp in initmean:
            plt.scatter(*comp, c='r')
    
    return initamp, initmean, initcovar
Пример #20
0
def run_xd(dafe):
    """Run XD on the delta afes"""
    ydata= numpy.empty((len(dafe),1))
    ycovar= numpy.zeros((len(dafe),1))
    ydata[:,0]= dafe
    xamp= numpy.array([0.5,0.5])
    xmean= numpy.empty((2,1))
    xcovar= numpy.empty((2,1,1))
    xmean[0,0]= 0.
    xmean[1,0]= -0.12
    xcovar[0,0,0]= 0.07
    xcovar[1,0,0]= 0.07
    extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar,fixmean=[False,True])
    return (xamp,xmean,xcovar)
Пример #21
0
def test_fixamp_alt2_dual_gauss_1d_nounc():
    # Generate data from two Gaussians, recover mean and variance
    ndata = 3001
    amp_true = 0.3
    assign = numpy.random.binomial(1, 1. - amp_true, ndata)
    ydata = numpy.zeros((ndata, 1))
    ydata[assign == 0,
          0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 2.
    ydata[assign == 1,
          0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1.
    ycovar = numpy.zeros_like(ydata)
    # initialize fit
    K = 2
    initamp = numpy.array([amp_true, 1. - amp_true])
    initmean = numpy.array([[-1.], [2.]])
    initcovar = numpy.zeros((K, 1, 1))
    numpy.random.uniform()  # hack to get diff init
    for kk in range(K):
        initcovar[kk] = numpy.mean(3. * numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          fixamp=[1])  # should be same as =True
    # Test
    tol = 12. / numpy.sqrt(ndata)
    first = initamp < 0.5
    assert numpy.fabs(
        initamp[first] - amp_true
    ) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties'
    assert numpy.fabs(
        initmean[first] - -2.
    ) < tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp'
    assert numpy.fabs(
        initcovar[first] - 1.
    ) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp'
    second = initamp >= 0.5
    assert numpy.fabs(
        initamp[second] - (1. - amp_true)
    ) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties'
    assert numpy.fabs(
        initmean[second] - 1.
    ) < 2. * tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp'
    assert numpy.fabs(
        initcovar[second] - 4.
    ) < 2. * tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp'
    return None
Пример #22
0
def test_triple_gauss_1d_varunc_snm_log():
    # Like in oned, but also log
    ndata= 3001
    amp_true= [0.1,0.3,0.6]
    assign= numpy.random.choice(numpy.arange(3),p=amp_true,size=ndata)
    ydata= numpy.zeros((ndata,1))
    ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-4.
    ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1.
    ydata[assign==2,0]= numpy.random.normal(size=numpy.sum(assign==2))*1.5+8.
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 3
    initamp= numpy.ones(K)/float(K)
    initmean= numpy.array([[-1.],[0.],[1.]])
    initcovar= numpy.zeros((K,1,1))
    for kk in range(K):
        initcovar[kk]= numpy.mean(3.*numpy.var(ydata))
    # Run XD
    logfile= 'test_log'
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          maxsnm=True,logfile=logfile)
    # Test
    tol= 25./numpy.sqrt(ndata)
    first= initamp > 0.5
    assert numpy.fabs(initamp[first]-amp_true[2]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties'
    assert numpy.fabs(initmean[first]-8.) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties'
    assert numpy.fabs(initcovar[first]-1.5**2.) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties'
    second= (initamp <= 0.5)*(initamp > 0.2)
    assert numpy.fabs(initamp[second]-amp_true[0]) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initmean[second]-1.) < 4.*tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initcovar[second]-4.) < 4.*tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    third= (initamp <= 0.2)
    assert numpy.fabs(initamp[third]-amp_true[1]) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initmean[third]--4.) < 2.*tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initcovar[third]-1.) < 2.*tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    # Now test that the logfiles exist
    assert os.path.exists(logfile+'_c.log'), 'XD did not produce _c.log logfile when asked'
    num_lines= sum(1 for line in open(logfile+'_c.log'))
    assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be"
    assert os.path.exists(logfile+'_loglike.log'), 'XD did not produce _loglike.log logfile when asked'
    num_lines= sum(1 for line in open(logfile+'_loglike.log'))
    assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be"
    os.remove(logfile+'_c.log')
    os.remove(logfile+'_loglike.log')
    return None
Пример #23
0
def test_single_gauss_2d_diagunc_proj():
    # Generate data from a single Gaussian, recover mean and variance
    ndata = 3001
    tydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]])
    # Randomly project
    ydata = numpy.zeros((ndata, 1))
    proj_x = numpy.random.binomial(2, 0.5, ndata)
    ydata[proj_x == 0, 0] = tydata[proj_x == 0, 0]
    ydata[proj_x == 1, 0] = tydata[proj_x == 1, 1]
    ydata[proj_x == 2, 0] = tydata[proj_x == 2, 0] + tydata[proj_x == 2, 1]
    projection = numpy.empty((ndata, 1, 2))
    projection[proj_x == 0] = numpy.array([[[1., 0.]]])
    projection[proj_x == 1] = numpy.array([[[0., 1.]]])
    projection[proj_x == 2] = numpy.array([[[1., 1.]]])
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean = numpy.array([[0., 1.]])
    initcovar = numpy.array([[[2., -1.], [-1., 3.]]])
    # Run XD
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          projection=projection)
    # Test
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean[0, 0] - 1.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initmean[0, 1] - 2.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 0] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 1, 1] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 1] - 0.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    return None
Пример #24
0
def score_baseline(datafile, results_dir, output_file):
    data = np.load(datafile)

    rf = os.listdir(results_dir)

    param_files = [
        f for f in rf if f.startswith('baseline_512') and f.endswith('.npz')
    ]

    scores = []

    for p in param_files:
        params = np.load(results_dir + p)
        weights = params['weights']
        means = params['means']
        covars = params['covar']

        test_score = extreme_deconvolution(data['X_test'],
                                           data['C_test'],
                                           weights,
                                           means,
                                           covars,
                                           likeonly=True)
        print(test_score)

        scores.append(test_score)

    print('Test Score: {} +- {}'.format(np.mean(scores), np.std(scores)))

    json.dump(scores, open(output_file, 'w'))
Пример #25
0
def test1_ngerrors():
    #Generate data
    ndata= 10001
    ngauss= 1
    ydata= numpy.random.normal(scale=[1.,2.],size=(ndata,2))*numpy.sqrt(2.)
    ycovar= numpy.ones((ndata,2))*0.
    ngamp= numpy.ones((ndata,ngauss))/ngauss
    ngmean= numpy.zeros((ndata,ngauss,2))
    ngcovar= numpy.ones((ndata,ngauss,2))
    xamp= numpy.ones(1)/1.
    xmean= numpy.array([[0.,0.]])
    xcovar= numpy.array([[[ 0.03821028, 0.02108113],
                          [ 0.02108113,  0.03173839]]])
#    """
    xamp= numpy.ones(2)/2.
    xmean= numpy.array([[0.,0.],[1.,-1.]])
    xcovar= numpy.array([[[ 0.03821028, 0.02108113],
                          [ 0.02014796,  0.03173839]],
                         [[ 0.06219194,  0.02302473],
                          [ 0.02738021,  0.06778009]]])
#    """
    l= extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar,
                             ng=True,
                             ngamp=ngamp,
                             ngmean=ngmean,
                             ngcovar=ngcovar)
    print l
    print xamp, xmean, xcovar
Пример #26
0
def test_single_gauss_2d_offdiagunc_proj():
    # Generate data from a single Gaussian, recover mean and variance
    ndata = 3001
    ydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]])
    # For half of the points, x -> x+y
    proj = numpy.random.uniform(size=ndata) > 0.5
    ydata[proj, 0] = ydata[proj, 0] + ydata[proj, 1]
    projection = numpy.empty((ndata, 2, 2))
    projection[proj] = numpy.array([[1., 1.], [0., 1.]])
    projection[True ^ proj] = numpy.array([[1., 0.], [0., 1.]])
    tycovar= numpy.ones_like(ydata)\
        *numpy.random.uniform(size=(ndata,2))/2.
    ydata += numpy.random.normal(size=(ndata, 2)) * numpy.sqrt(tycovar)
    ycovar = numpy.empty((ndata, 2, 2))
    for ii in range(ndata):
        ycovar[ii] = numpy.diag(tycovar[ii])
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\
                                   +numpy.std(ydata,axis=0))
    initcovar = numpy.atleast_3d(numpy.cov(ydata, rowvar=False)).T
    # Run XD
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          projection=projection)
    # Test
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean[0, 0] - 1.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initmean[0, 1] - 2.
    ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 0] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 1, 1] - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    assert numpy.fabs(
        initcovar[0, 0, 1] - 0.
    ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties'
    return None
Пример #27
0
def test_single_gauss_1d_nounc():
    # Generate data from a single Gaussian, recover mean and variance
    ndata= 3001
    ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    ycovar= numpy.zeros_like(ydata)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.atleast_2d(numpy.mean(ydata)+1.)
    initcovar= numpy.atleast_3d(3.*numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar)
    # Test
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/o uncertainties'
    assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties'
    return None
Пример #28
0
def TryModel(nGaussiansStar, nGaussiansGalaxy): 
    print 'Star Gaussians: {0}'.format(nGaussiansStar)
    print 'Galaxy Gaussians: {0}'.format(nGaussiansGalaxy)

    #convolving
    print 'Estimating Gaussians'
    GMMStar = GMM(nGaussiansStar, n_iter = 10, covariance_type='full').fit(XTrainStar)
    GMMGalaxy = GMM(nGaussiansGalaxy, n_iter=10, covariance_type='full').fit(XTrainGalaxy)

    ampstar = GMMStar.weights_
    meanstar = GMMStar.means_
    covarstar = GMMStar.covars_

    ampgalaxy = GMMGalaxy.weights_
    meangalaxy = GMMGalaxy.means_
    covargalaxy = GMMGalaxy.covars_


    # Results are saved in `amp`, `mean`, and `covar`
    print 'Deconvolving star'

    xd.extreme_deconvolution(XTrainStar, XErrTrainStar, ampstar, meanstar, covarstar)

    clfstar = XDGMM(nGaussiansStar)
    clfstar.alpha = ampstar
    clfstar.mu = meanstar
    clfstar.V = covarstar

    print 'Deconvolving galaxies'
    xd.extreme_deconvolution(XTrainGalaxy, XErrTrainGalaxy, ampgalaxy, meangalaxy, covargalaxy)

    clfgalaxy = XDGMM(nGaussiansGalaxy)
    clfgalaxy.alpha = ampgalaxy
    clfgalaxy.mu = meangalaxy
    clfgalaxy.V = covargalaxy


    print 'Predicting'
    # need to pass XTestStar[i] and XTestGalaxy[i] as np.array([XTestStar[i]]) because internally it assumes 2D matrix
    starPredictions = np.array([predictStar(clfstar, clfgalaxy, np.array([XTestStar[i]]), np.array([XErrTestStar[i]]), i) for i in range(starTestNumber)])
    galaxyPredictions = np.array([predictStar(clfstar, clfgalaxy, np.array([XTestGalaxy[i]]), np.array([XErrTestGalaxy[i]]), i) for i in range(galaxyTestNumber)])

    predictions = np.array(starPredictions.tolist() +  galaxyPredictions.tolist())
    results = np.array([1 for i in range(len(starPredictions))] + [0 for i in range(len(galaxyPredictions))])
    report = generateReport(predictions, results)
    return (report['Precision'], report['Recall'], clfstar, clfgalaxy)
def test_dual_gauss_1d_varunc():
    # Generate data from two Gaussians, recover mean and variance
    ndata = 3001
    amp_true = 0.3
    assign = numpy.random.binomial(1, 1. - amp_true, ndata)
    ydata = numpy.zeros((ndata, 1))
    ydata[assign == 0,
          0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 2.
    ydata[assign == 1,
          0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1.
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 2
    initamp = numpy.ones(K) / float(K)
    initmean = numpy.array([[-1.], [0.]])
    initcovar = numpy.zeros((K, 1, 1))
    for kk in range(K):
        initcovar[kk] = numpy.mean(3. * numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar)
    # Test
    tol = 20. / numpy.sqrt(ndata)
    first = initamp < 0.5
    assert numpy.fabs(
        initamp[first] - amp_true
    ) < tol, 'XD does not recover correct amp for dual Gaussian w/  uncertainties'
    assert numpy.fabs(
        initmean[first] - -2.
    ) < tol, 'XD does not recover correct mean for dual Gaussian w/  uncertainties'
    assert numpy.fabs(
        initcovar[first] - 1.
    ) < tol, 'XD does not recover correct variance for dual Gaussian w/  uncertainties'
    second = initamp >= 0.5
    assert numpy.fabs(
        initamp[second] - (1. - amp_true)
    ) < tol, 'XD does not recover correct amp for dual Gaussian w/  uncertainties'
    assert numpy.fabs(
        initmean[second] - 1.
    ) < 2. * tol, 'XD does not recover correct mean for dual Gaussian w/  uncertainties'
    assert numpy.fabs(
        initcovar[second] - 4.
    ) < 2. * tol, 'XD does not recover correct variance for dual Gaussian w/  uncertainties'
    return None
Пример #30
0
def test_fixmean_single_gauss_1d_nounc():
    # Generate data from a single Gaussian, recover variance, fixing mean
    ndata= 3001
    ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T+1.
    ycovar= numpy.zeros_like(ydata)
    # initialize fit
    K= 1
    initamp= numpy.ones(K)
    initmean= numpy.array([[1.]])
    initcovar= numpy.atleast_3d(3.*numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          fixmean=True)
    # Test
    tol= 10./numpy.sqrt(ndata)
    assert numpy.fabs(initmean-1.) < 10.**-10., 'XD did not fixmean for single Gaussian'
    assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties, fixing mean'
    return None
Пример #31
0
def test_single_gauss_1d_varunc_log_loglikeonly():
    # Same as in test_oned, but now also log
    ndata = 3001
    ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean = numpy.atleast_2d(numpy.mean(ydata) + numpy.std(ydata))
    initcovar = numpy.atleast_3d(3. * numpy.var(ydata))
    # Run XD
    logfile = 'test_log'
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          logfile=logfile)
    # First test that fit worked
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean - 0.
    ) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties'
    assert numpy.fabs(
        initcovar - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties'
    # Now compute the likelihood and check that it is the same as in the logfile
    lnl = extreme_deconvolution(ydata,
                                ycovar,
                                initamp,
                                initmean,
                                initcovar,
                                likeonly=True)
    with open(logfile + '_loglike.log') as log:
        lines = log.readlines()
        assert numpy.fabs(
            float(lines[-3]) - lnl
        ) < 10.**-6., 'loglike computed using likeonly is not the same as in the logfile'
    os.remove(logfile + '_c.log')
    os.remove(logfile + '_loglike.log')
    return None
Пример #32
0
def measure_kinematics_onepop(tgas,twomass,jk,dm,mj,spii,zbins,options,
                              csvwriter,csvout,maxcovar=30.):
    # Compute XYZ
    lb= bovy_coords.radec_to_lb(tgas['ra'],tgas['dec'],degree=True,epoch=None)
    XYZ= bovy_coords.lbd_to_XYZ(lb[:,0],lb[:,1],1./tgas['parallax'],
                                degree=True)
    # Generate vradec and projection matrix
    vradec= numpy.array([bovy_coords._K/tgas['parallax']*tgas['pmra'],
                         bovy_coords._K/tgas['parallax']*tgas['pmdec']])
    proj= compute_projection(tgas)
    # Sample from the joint (parallax,proper motion) uncertainty distribution 
    # to get the covariance matrix of the vradec, using MC sims
    nmc= 10001
    vradec_cov= compute_vradec_cov_mc(tgas,nmc)
    # Fit each zbin
    if spii == options.start:
        startz= options.startz
    else:
        startz= 0
    for ii in tqdm.trange(startz,len(zbins)-1):
        indx= (XYZ[:,2] > zbins[ii])\
              *(XYZ[:,2] <= zbins[ii+1])\
              *(numpy.sqrt(XYZ[:,0]**2.+XYZ[:,1]**2.) < 0.2)
        nstar= numpy.sum(indx)
        if numpy.sum(indx) < 30: continue
        # Basic XD fit
        ydata= vradec.T[indx]
        ycovar= numpy.zeros_like(vradec.T)[indx]
        initamp= numpy.random.uniform(size=options.ngauss)
        initamp/= numpy.sum(initamp)
        m= numpy.zeros(3)
        s= numpy.array([40.,40.,20.])
        initmean= []
        initcovar= []
        for jj in range(options.ngauss):
            initmean.append(m+numpy.random.normal(size=3)*s)
            initcovar.append(4.*s**2.*numpy.diag(numpy.ones(3)))
        initcovar= numpy.array(initcovar)
        initmean= numpy.array(initmean)
        lnL= extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                                   projection=proj[indx])
        sig2z= combined_sig2(initamp,initmean[:,2],initcovar[:,2,2],
                             maxcovar=maxcovar)
        kurtz= combined_k(initamp,initmean[:,2],initcovar[:,2,2],
                          maxcovar=maxcovar)
        sam= bootstrap(options.nboot,
                       vradec.T[indx],vradec_cov[indx],proj[indx],
                       ngauss=options.ngauss,maxcovar=maxcovar)
        sig2z_err= 1.4826*numpy.median(numpy.fabs(sam[0]-numpy.median(sam[0])))
        kurtz_err= 1.4826*numpy.median(numpy.fabs(sam[1]-numpy.median(sam[1])))
        sig2kurtz_corr= numpy.corrcoef(sam)[0,1]
        csvwriter.writerow([spii,ii,nstar,
                            sig2z,sig2z_err,kurtz,kurtz_err,sig2kurtz_corr])
        csvout.flush()
    return None
Пример #33
0
def test_single_gauss_1d_varunc_log():
    # Same as in test_oned, but now also log
    ndata = 3001
    ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean = numpy.atleast_2d(numpy.mean(ydata) + numpy.std(ydata))
    initcovar = numpy.atleast_3d(3. * numpy.var(ydata))
    # Run XD
    logfile = 'test_log'
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          logfile=logfile)
    # First test that fit worked
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean - 0.
    ) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties'
    assert numpy.fabs(
        initcovar - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties'
    # Now test that the logfiles exist
    assert os.path.exists(
        logfile + '_c.log'), 'XD did not produce _c.log logfile when asked'
    num_lines = sum(1 for line in open(logfile + '_c.log'))
    assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be"
    assert os.path.exists(
        logfile +
        '_loglike.log'), 'XD did not produce _loglike.log logfile when asked'
    num_lines = sum(1 for line in open(logfile + '_loglike.log'))
    assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be"
    os.remove(logfile + '_c.log')
    os.remove(logfile + '_loglike.log')
    return None
def test_single_gauss_1d_nounc():
    # Generate data from a single Gaussian, recover mean and variance
    ndata = 3001
    ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T
    ycovar = numpy.zeros_like(ydata)
    # initialize fit
    K = 1
    initamp = numpy.ones(K)
    initmean = numpy.atleast_2d(numpy.mean(ydata) + 1.)
    initcovar = numpy.atleast_3d(3. * numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar)
    # Test
    tol = 10. / numpy.sqrt(ndata)
    assert numpy.fabs(
        initmean - 0.
    ) < tol, 'XD does not recover correct mean for single Gaussian w/o uncertainties'
    assert numpy.fabs(
        initcovar - 1.
    ) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties'
    return None
Пример #35
0
def test_ngerrors():
    samples= False
    if samples:
        ngauss= 10
    else:
        ngauss= 2
    #Generate data
    ndata= 10001
    ydata= numpy.random.normal(size=(ndata,1))
    #Add noise
    for ii in range(ndata):
        if not samples:
            if numpy.random.uniform() < 0.5:
                ydata[ii,0]+= numpy.random.normal()+5.
            else:
                ydata[ii,0]+= numpy.random.normal()-3.
    #bovy_plot.bovy_print()
    #bovy_plot.bovy_hist(ydata,bins=101,histtype='step',color='k')
    #bovy_plot.bovy_end_print('/Users/bovy/Desktop/test.png')
    ycovar= numpy.ones((ndata,2))*0.
    ngamp= numpy.ones((ndata,ngauss,1))/ngauss
    ngmean= numpy.zeros((ndata,ngauss,1))
    if samples:
        for ii in range(ndata):
            for jj in range(ngauss):
                if numpy.random.uniform() < 0.5:
                    ngmean[ii,jj,0]= ydata[ii,0]+(numpy.random.normal()+5.)
                else:
                    ngmean[ii,jj,0]= ydata[ii,0]+(numpy.random.normal()-3.)
            ydata[ii,0]= 0.
        ngcovar= numpy.zeros((ndata,ngauss,1))
    else:
        ngmean[:,0,0]= 5.
        ngmean[:,1,0]= -3.
        ngcovar= numpy.ones((ndata,ngauss,1))
    xamp= numpy.ones(1)/1.
    xmean= numpy.array([[0.]])
    xcovar= numpy.array([[[0.03821028]]])
    """
    xamp= numpy.ones(2)/2.
    xmean= numpy.array([[0.,0.],[1.,-1.]])
    xcovar= numpy.array([[[ 0.03821028, 0.02108113],
                          [ 0.02014796,  0.03173839]],
                         [[ 0.06219194,  0.02302473],
                          [ 0.02738021,  0.06778009]]])
    """
    l= extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar,
                             ng=True,
                             ngamp=ngamp,
                             ngmean=ngmean,
                             ngcovar=ngcovar)
    print l
    print xamp, xmean, xcovar
Пример #36
0
def test_triple_gauss_1d_varunc_snm():
    # Generate data from two Gaussians, recover mean and variance
    # Also run split-and-merge
    ndata= 3001
    amp_true= [0.3,0.1,0.6]
    assign= numpy.random.choice(numpy.arange(3),p=amp_true,size=ndata)
    ydata= numpy.zeros((ndata,1))
    ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-4.
    ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1.
    ydata[assign==2,0]= numpy.random.normal(size=numpy.sum(assign==2))*1.5+8.
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K= 3
    initamp= numpy.ones(K)/float(K)
    initmean= numpy.array([[-1.],[0.],[1.]])
    initcovar= numpy.zeros((K,1,1))
    for kk in range(K):
        initcovar[kk]= numpy.mean(3.*numpy.var(ydata))
    # Run XD
    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          maxsnm=True)
    # Test
    tol= 25./numpy.sqrt(ndata)
    first= initamp > 0.5
    assert numpy.fabs(initamp[first]-amp_true[2]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties'
    assert numpy.fabs(initmean[first]-8.) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties'
    assert numpy.fabs(initcovar[first]-1.5**2.) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties'
    second= (initamp <= 0.5)*(initamp > 0.2)
    assert numpy.fabs(initamp[second]-amp_true[0]) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initmean[second]--4.) < 2.*tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initcovar[second]-1.) < 2.*tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    third= (initamp <= 0.2)
    assert numpy.fabs(initamp[third]-amp_true[1]) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initmean[third]-1.) < 4.*tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(initcovar[third]-4.) < 6.*tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    return None
Пример #37
0
def xd(data,init_xdtarget):
    initamp= init_xdtarget.amp
    initmean= init_xdtarget.mean
    initcovar= init_xdtarget.covar

    ydata= data.a
    ycovar= data.acov
    if hasattr(data,'weight'):
        weight= data.weight
    else:
        weight= None
    if hasattr(data,'logweight'):
        logweight= data.logweight
    else:
        logweight= False

    extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,
                          weight=weight,logweight=logweight)
                        
    out_xdtarget= xdtarget(amp=initamp,mean=initmean,covar=initcovar)

    return out_xdtarget
Пример #38
0
 def fit(self, X, Xerr): 
     ''' fit GMM to X and Xerr
     '''
     from extreme_deconvolution import extreme_deconvolution
     X, Xerr = self._X_check(X, Xerr)
     self._X = X 
     self._Xerr = Xerr
     gmm = GMM(self.n_components, n_iter=10, covariance_type='full').fit(X)
     w, m, c = gmm.weights_.copy(), gmm.means_.copy(), gmm.covars_.copy()
     l = extreme_deconvolution(X, Xerr, w, m, c)
     self.l = l 
     self.weights_ = w 
     self.means_ = m
     self.covariances_ = c
     return None
Пример #39
0
def deconvolveAbundances(options,args):
    if options.xdfile is None:
        print "'xdfile' option needs to be set ..."
        print "Returning ..."
        return
    if os.path.exists(options.xdfile):
        return #Nothing to do
        #Load data
    raw= readRealData(options,args)
    #Deconvolve using XD, setup data
    ydata= numpy.zeros((len(raw),2))
    ycovar= numpy.zeros((len(raw),2))
    ydata[:,0]= raw.feh
    ydata[:,1]= raw.afe
    ycovar[:,0]= options.dfeh**2.
    ycovar[:,0]= options.dafe**2.
    #setup initial cond
    xamp= numpy.ones(2)/2.
    xmean= numpy.zeros((2,2))
    xmean[0,0]= 0. #Solar abundances
    xmean[0,1]= 0.
    xmean[1,]= -0.6 #"thick" abundances
    xmean[1,1]= 0.35
    xcovar= numpy.zeros((2,2,2))
    xcovar[0,0,0]= 0.2**2.
    xcovar[1,0,0]= 0.2**2.
    xcovar[0,1,1]= 0.1**2.
    xcovar[1,1,1]= 0.1**2.
    #Run XD
    extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar)
    #Save
    outfile= open(options.xdfile,'wb')
    pickle.dump(xamp,outfile)
    pickle.dump(xmean,outfile)
    pickle.dump(xcovar,outfile)
    outfile.close()
Пример #40
0
def bootstrap(nboot,vrd,vrd_cov,proj,ngauss=2,maxcovar=30.):
    out= numpy.empty((2,nboot))
    for ii in range(nboot):
        # Draw w/ replacement
        indx= numpy.floor(numpy.random.uniform(size=len(vrd))*len(vrd)).astype('int')
        ydata= vrd[indx]
        ycovar= vrd_cov[indx]
        initamp= numpy.random.uniform(size=ngauss)
        initamp/= numpy.sum(initamp)
        m= numpy.zeros(3)
        s= numpy.array([40.,40.,20.])
        initmean= []
        initcovar= []
        for jj in range(ngauss):
            initmean.append(m+numpy.random.normal(size=3)*s)
            initcovar.append(4.*s**2.*numpy.diag(numpy.ones(3)))
        initcovar= numpy.array(initcovar)
        initmean= numpy.array(initmean)
        lnL= extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,projection=proj[indx])
        out[0,ii]= combined_sig2(initamp,initmean[:,2],initcovar[:,2,2],
                                 maxcovar=maxcovar)
        out[1,ii]= combined_k(initamp,initmean[:,2],initcovar[:,2,2],
                              maxcovar=maxcovar)
    return out
GMMStar = GMM(nGaussiansStar, n_iter = 10, covariance_type='full').fit(XTrainStar)
GMMGalaxy = GMM(nGaussiansGalaxy, n_iter=10, covariance_type='full').fit(XTrainGalaxy)

ampstar = GMMStar.weights_
meanstar = GMMStar.means_
covarstar = GMMStar.covars_

ampgalaxy = GMMGalaxy.weights_
meangalaxy = GMMGalaxy.means_
covargalaxy = GMMGalaxy.covars_


# Results are saved in `amp`, `mean`, and `covar`
print 'Deconvolving star'

xd.extreme_deconvolution(XTrainStar, XErrTrainStar, ampstar, meanstar, covarstar)

clfstar = XDGMM(nGaussiansStar)
clfstar.alpha = ampstar
clfstar.mu = meanstar
clfstar.V = covarstar


print 'Deconvolving galaxies'
xd.extreme_deconvolution(XTrainGalaxy, XErrTrainGalaxy, ampgalaxy, meangalaxy, covargalaxy)

clfgalaxy = XDGMM(nGaussiansGalaxy)
clfgalaxy.alpha = ampgalaxy
clfgalaxy.mu = meangalaxy
clfgalaxy.V = covargalaxy
Пример #42
0
def plot_mapflarepdf(savename, plotname):
    # Load the samples
    with open('../mapfits/tribrokenexpflare.sav', 'rb') as savefile:
        bf = numpy.array(pickle.load(savefile))
        samples = numpy.array(pickle.load(savefile))
    maps = define_rcsample.MAPs()
    # Loop through the low-alpha MAPs and compute the XD decomposition
    if 'lowalpha' in savename:
        plotmaps = [9, 16, 23, 29, 36, 43, 50, 57, 64, 71]
    else:
        plotmaps = [19, 26, 32, 39, 45]
    if not os.path.exists(savename):
        ngauss = 2
        allxamp = numpy.empty((len(plotmaps), ngauss))
        allxmean = numpy.empty((len(plotmaps), ngauss, 1))
        allxcovar = numpy.empty((len(plotmaps), ngauss, 1, 1))
        cnt = 0
        for ii, map in enumerate(maps.map()):
            if not ii in plotmaps: continue
            print ii
            # Fit PDFs with XD
            xamp = numpy.array([0.45, 0.5])
            xmean = numpy.array([
                numpy.mean(samples[ii, 4]) +
                numpy.random.normal() * numpy.std(samples[ii, 4]),
                numpy.mean(samples[ii, 4]) +
                numpy.random.normal() * numpy.std(samples[ii, 4])
            ])[:, numpy.newaxis]
            xcovar = numpy.reshape(
                numpy.tile(numpy.var(samples[ii, 4]), (2, 1)), (2, 1, 1))
            XD.extreme_deconvolution(samples[ii, 4][:, numpy.newaxis],
                                     numpy.zeros((len(samples[ii, 4]), 1)),
                                     xamp, xmean, xcovar)
            allxamp[cnt] = xamp
            allxmean[cnt] = xmean
            allxcovar[cnt] = xcovar
            cnt += 1
        save_pickles(savename, allxamp, allxmean, allxcovar)
    else:
        with open(savename, 'rb') as savefile:
            allxamp = pickle.load(savefile)
            allxmean = pickle.load(savefile)
            allxcovar = pickle.load(savefile)
    # Now plot
    cmap = cm.coolwarm
    xrange = [-0.37, 0.25]
    if 'lowalpha' in savename:
        #        xrange= [-0.4,0.2]
        yrange = [0., 30.]
        combDiv = 2.
        colorFunc = lambda x: cmap((x + 0.6) * 0.95 / 0.9 + 0.05)
    else:
        #        xrange= [-0.3,0.3]
        yrange = [0., 13.5]
        colorFunc = lambda x: cmap((x + 0.5) * 0.95 / 0.5 + 0.05)
        combDiv = 1.5
    overplot = False
    plotXDFit = True
    cnt = 0
    bovy_plot.bovy_print(axes_labelsize=18,
                         text_fontsize=18,
                         xtick_labelsize=14,
                         ytick_labelsize=14)
    for ii, map in enumerate(maps.map()):
        if not ii in plotmaps: continue
        tfeh = round(numpy.median(map['FE_H']) * 20.) / 20.
        if tfeh == 0.25: tfeh = 0.3
        if tfeh == -0.1: tfeh = -0.1
        bovy_plot.bovy_hist(
            samples[ii, 4],
            range=xrange,
            bins=51,
            overplot=overplot,
            yrange=yrange,
            histtype='step',
            normed=True,
            zorder=2,
            color=colorFunc(tfeh),
            xlabel=r'$R_{\mathrm{flare}}^{-1}\,(\mathrm{kpc}^{-1})$')
        if plotXDFit:
            txs = numpy.linspace(xrange[0], xrange[1], 1001)
            pyplot.plot(
                txs,
                1. / numpy.sqrt(2. * numpy.pi) *
                (allxamp[cnt, 0] / numpy.sqrt(allxcovar[cnt, 0, 0, 0]) *
                 numpy.exp(-0.5 * (txs - allxmean[cnt, 0, 0])**2. /
                           allxcovar[cnt, 0, 0, 0]) +
                 allxamp[cnt, 1] / numpy.sqrt(allxcovar[cnt, 1, 0, 0]) *
                 numpy.exp(-0.5 * (txs - allxmean[cnt, 1, 0])**2. /
                           allxcovar[cnt, 1, 0, 0])),
                color=colorFunc(tfeh),
                zorder=1)
        overplot = True
        cnt += 1
    txs = numpy.linspace(xrange[0], xrange[1], 1001)
    comb = numpy.ones_like(txs)
    for ii in range(len(plotmaps)):
        comb *= 1. / numpy.sqrt(2. * numpy.pi) * (
            allxamp[ii, 0] / numpy.sqrt(allxcovar[ii, 0, 0, 0]) *
            numpy.exp(-0.5 *
                      (txs - allxmean[ii, 0, 0])**2. / allxcovar[ii, 0, 0, 0])
            + allxamp[ii, 1] / numpy.sqrt(allxcovar[ii, 1, 0, 0]) *
            numpy.exp(-0.5 *
                      (txs - allxmean[ii, 1, 0])**2. / allxcovar[ii, 1, 0, 0]))
    comb /= numpy.sum(comb) * (txs[1] - txs[0])
    pyplot.plot(txs, comb / combDiv, 'k-', lw=2., zorder=20)
    pyplot.plot([0., 0.], [0., 50.], 'k--', lw=1.5, zorder=0)
    t = pyplot.text(
        xrange[0] + 0.25 * (xrange[1] - xrange[0]) + 0.03 *
        ('highalpha' in savename),
        0.8 * yrange[1],
        r'$R_{\mathrm{flare}}^{-1} = %.2f \pm %.2f\,\mathrm{kpc}^{-1}$' %
        (numpy.sum(comb * txs) / numpy.sum(comb),
         numpy.sqrt(
             numpy.sum(comb * txs**2.) / numpy.sum(comb) -
             (numpy.sum(comb * txs) / numpy.sum(comb))**2.)),
        size=18.)
    t.set_bbox(dict(color='w', edgecolor='none'))
    if 'lowalpha' in savename:
        bovy_plot.bovy_text(
            r'$\mathrm{low-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$',
            top_left=True,
            size=16.)
    else:
        bovy_plot.bovy_text(
            r'$\mathrm{high-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$',
            top_left=True,
            size=16.)
    bovy_plot.bovy_end_print(plotname)
    return None
Пример #43
0
def test_triple_gauss_1d_varunc_snm_log():
    # Like in oned, but also log
    ndata = 3001
    amp_true = [0.1, 0.3, 0.6]
    assign = numpy.random.choice(numpy.arange(3), p=amp_true, size=ndata)
    ydata = numpy.zeros((ndata, 1))
    ydata[assign == 0,
          0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 4.
    ydata[assign == 1,
          0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1.
    ydata[assign == 2,
          0] = numpy.random.normal(size=numpy.sum(assign == 2)) * 1.5 + 8.
    ycovar= numpy.ones_like(ydata)*\
        numpy.atleast_2d(numpy.random.uniform(size=ndata)).T
    ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\
        *numpy.sqrt(ycovar)
    # initialize fit
    K = 3
    initamp = numpy.ones(K) / float(K)
    initmean = numpy.array([[-1.], [0.], [1.]])
    initcovar = numpy.zeros((K, 1, 1))
    for kk in range(K):
        initcovar[kk] = numpy.mean(3. * numpy.var(ydata))
    # Run XD
    logfile = 'test_log'
    extreme_deconvolution(ydata,
                          ycovar,
                          initamp,
                          initmean,
                          initcovar,
                          maxsnm=True,
                          logfile=logfile)
    # Test
    tol = 25. / numpy.sqrt(ndata)
    first = initamp > 0.5
    assert numpy.fabs(
        initamp[first] - amp_true[2]
    ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties'
    assert numpy.fabs(
        initmean[first] - 8.
    ) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties'
    assert numpy.fabs(
        initcovar[first] - 1.5**2.
    ) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties'
    second = (initamp <= 0.5) * (initamp > 0.2)
    assert numpy.fabs(
        initamp[second] - amp_true[0]
    ) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initmean[second] - 1.
    ) < 4. * tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initcovar[second] - 4.
    ) < 4. * tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    third = (initamp <= 0.2)
    assert numpy.fabs(
        initamp[third] - amp_true[1]
    ) < tol, 'XD does not recover correct amp for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initmean[third] - -4.
    ) < 2. * tol, 'XD does not recover correct mean for triple Gaussian w/  uncertainties'
    assert numpy.fabs(
        initcovar[third] - 1.
    ) < 2. * tol, 'XD does not recover correct variance for triple Gaussian w/  uncertainties'
    # Now test that the logfiles exist
    assert os.path.exists(
        logfile + '_c.log'), 'XD did not produce _c.log logfile when asked'
    num_lines = sum(1 for line in open(logfile + '_c.log'))
    assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be"
    assert os.path.exists(
        logfile +
        '_loglike.log'), 'XD did not produce _loglike.log logfile when asked'
    num_lines = sum(1 for line in open(logfile + '_loglike.log'))
    assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be"
    os.remove(logfile + '_c.log')
    os.remove(logfile + '_loglike.log')
    return None
Пример #44
0
def plot_mapflarepdf(savename,plotname):
    # Load the samples
    with open('../mapfits/tribrokenexpflare.sav','rb') as savefile:
        bf= numpy.array(pickle.load(savefile))
        samples= numpy.array(pickle.load(savefile))
    maps= define_rcsample.MAPs()
    # Loop through the low-alpha MAPs and compute the XD decomposition
    if 'lowalpha' in savename:
        plotmaps= [9,16,23,29,36,43,50,57,64,71]
    else:
        plotmaps= [19,26,32,39,45]
    if not os.path.exists(savename):
        ngauss= 2
        allxamp= numpy.empty((len(plotmaps),ngauss))
        allxmean= numpy.empty((len(plotmaps),ngauss,1))
        allxcovar= numpy.empty((len(plotmaps),ngauss,1,1))
        cnt= 0
        for ii, map in enumerate(maps.map()):
            if not ii in plotmaps: continue
            print ii
            # Fit PDFs with XD
            xamp= numpy.array([0.45,0.5])
            xmean= numpy.array([numpy.mean(samples[ii,4])
                                +numpy.random.normal()*numpy.std(samples[ii,4]),
                                numpy.mean(samples[ii,4])
                                +numpy.random.normal()*numpy.std(samples[ii,4])])[:,numpy.newaxis]
            xcovar= numpy.reshape(numpy.tile(numpy.var(samples[ii,4]),(2,1)),
                                  (2,1,1))
            XD.extreme_deconvolution(samples[ii,4][:,numpy.newaxis],
                                     numpy.zeros((len(samples[ii,4]),1)),
                                     xamp,xmean,xcovar)
            allxamp[cnt]= xamp
            allxmean[cnt]= xmean
            allxcovar[cnt]= xcovar
            cnt+= 1
        save_pickles(savename,allxamp,allxmean,allxcovar)
    else:
        with open(savename,'rb') as savefile:
            allxamp= pickle.load(savefile)
            allxmean= pickle.load(savefile)
            allxcovar= pickle.load(savefile)
    # Now plot
    cmap= cm.coolwarm
    xrange= [-0.37,0.25]
    if 'lowalpha' in savename:
#        xrange= [-0.4,0.2]
        yrange= [0.,30.]
        combDiv= 2.
        colorFunc= lambda x: cmap((x+0.6)*0.95/0.9+0.05)
    else:
#        xrange= [-0.3,0.3]
        yrange= [0.,13.5]
        colorFunc= lambda x: cmap((x+0.5)*0.95/0.5+0.05)
        combDiv= 1.5
    overplot= False
    plotXDFit= True
    cnt= 0
    bovy_plot.bovy_print(axes_labelsize=18,text_fontsize=18,
                         xtick_labelsize=14,ytick_labelsize=14)
    for ii, map in enumerate(maps.map()):
        if not ii in plotmaps: continue
        tfeh= round(numpy.median(map['FE_H'])*20.)/20.
        if tfeh == 0.25: tfeh= 0.3
        if tfeh == -0.1: tfeh= -0.1
        bovy_plot.bovy_hist(samples[ii,4],
                            range=xrange,bins=51,overplot=overplot,
                            yrange=yrange,
                            histtype='step',normed=True,zorder=2,
                            color=colorFunc(tfeh),
                            xlabel=r'$R_{\mathrm{flare}}^{-1}\,(\mathrm{kpc}^{-1})$')
        if plotXDFit:
            txs= numpy.linspace(xrange[0],xrange[1],1001)
            pyplot.plot(txs,1./numpy.sqrt(2.*numpy.pi)*(allxamp[cnt,0]/numpy.sqrt(allxcovar[cnt,0,0,0])*numpy.exp(-0.5*(txs-allxmean[cnt,0,0])**2./allxcovar[cnt,0,0,0])
                                                 +allxamp[cnt,1]/numpy.sqrt(allxcovar[cnt,1,0,0])*numpy.exp(-0.5*(txs-allxmean[cnt,1,0])**2./allxcovar[cnt,1,0,0])),
                        color=colorFunc(tfeh),
                        zorder=1)
        overplot=True
        cnt+= 1
    txs= numpy.linspace(xrange[0],xrange[1],1001)
    comb= numpy.ones_like(txs)
    for ii in range(len(plotmaps)):
        comb*= 1./numpy.sqrt(2.*numpy.pi)*(allxamp[ii,0]/numpy.sqrt(allxcovar[ii,0,0,0])*numpy.exp(-0.5*(txs-allxmean[ii,0,0])**2./allxcovar[ii,0,0,0])
                                           +allxamp[ii,1]/numpy.sqrt(allxcovar[ii,1,0,0])*numpy.exp(-0.5*(txs-allxmean[ii,1,0])**2./allxcovar[ii,1,0,0]))
    comb/= numpy.sum(comb)*(txs[1]-txs[0])
    pyplot.plot(txs,comb/combDiv,'k-',lw=2.,zorder=20)
    pyplot.plot([0.,0.],[0.,50.],'k--',lw=1.5,zorder=0)
    t= pyplot.text(xrange[0]+0.25*(xrange[1]-xrange[0])+0.03*('highalpha' in savename),
                        0.8*yrange[1],
                        r'$R_{\mathrm{flare}}^{-1} = %.2f \pm %.2f\,\mathrm{kpc}^{-1}$' % (numpy.sum(comb*txs)/numpy.sum(comb), numpy.sqrt(numpy.sum(comb*txs**2.)/numpy.sum(comb)-(numpy.sum(comb*txs)/numpy.sum(comb))**2.)),
                        size=18.)
    t.set_bbox(dict(color='w',edgecolor='none'))
    if 'lowalpha' in savename:
        bovy_plot.bovy_text(r'$\mathrm{low-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$',
                            top_left=True,
                            size=16.)
    else:
        bovy_plot.bovy_text(r'$\mathrm{high-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$',
                            top_left=True,
                            size=16.)
    bovy_plot.bovy_end_print(plotname)
    return None
Пример #45
0
def XDPotPDFs(options,args):
    #First load the chains
    savefile= open(args[0],'rb')
    thesesamples= pickle.load(savefile)
    savefile.close()
    if not options.derivedfile is None:
        if os.path.exists(options.derivedfile):
            derivedfile= open(options.derivedfile,'rb')
            derivedsamples= pickle.load(derivedfile)
            derivedfile.close()
        else:
            raise IOError("--derivedfile given but does not exist ...")
    samples= {}
    scaleDict= {}
    paramnames= ['rd','vc','zh','fh','dlnvcdlnr','usun','vsun']
    scale= [_REFR0,_REFV0,1000.*_REFR0,1.,1./30.*_REFV0/_REFR0,_REFV0,_REFV0]
    if len(thesesamples[0]) == 5:
        paramnames.pop()
        paramnames.pop()
        scale.pop()
        scale.pop()
    if not options.derivedfile is None:
        paramnames.extend(['surfz','surfzdisk','rhodm',
                           'rhoo','massdisk','plhalo','vcdvc'])
        scale.extend([1.,1.,1.,1.,1.,1.,1.])
    for kk in range(len(thesesamples[0])):
        xs= numpy.array([s[kk] for s in thesesamples])
        if paramnames[kk] == 'rd' or paramnames[kk] == 'zh':
            xs= numpy.exp(xs)
        samples[paramnames[kk]]= xs
        scaleDict[paramnames[kk]]= scale[kk]
    if not options.derivedfile is None:
        for ll in range(len(thesesamples[0]),
                        len(thesesamples[0])+7):#len(derivedsamples[0])):
            kk= ll-len(thesesamples[0])
            xs= numpy.array([s[kk] for s in derivedsamples])
            samples[paramnames[ll]]= xs
            scaleDict[paramnames[ll]]= scale[ll]
    #Now fit XD to the three 2D PDFs
    #1) Vd/v vs. Rd
    ydata= numpy.zeros((len(samples['vcdvc']),2))
    ycovar= numpy.zeros((len(samples['vcdvc']),2))
    ydata[:,0]= numpy.log(samples['rd'])
    ydata[:,1]= special.logit(samples['vcdvc'])
    vcdxamp= numpy.ones(options.g)/options.g
    vcdxmean= numpy.zeros((options.g,2))
    vcdxcovar= numpy.zeros((options.g,2,2))
    for ii in range(options.g):
        vcdxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4.
        vcdxcovar[ii,0,0]= numpy.var(ydata[:,0])
        vcdxcovar[ii,1,1]= numpy.var(ydata[:,1])
    extreme_deconvolution.extreme_deconvolution(ydata,ycovar,
                                                vcdxamp,vcdxmean,vcdxcovar)
    #2) alpha_dm vs. rho_dm
    ydata= numpy.zeros((len(samples['rhodm']),2))
    ycovar= numpy.zeros((len(samples['rhodm']),2))
    ydata[:,0]= numpy.log(samples['rhodm'])
    ydata[:,1]= special.logit(samples['plhalo']/3.)
    rhodmxamp= numpy.ones(options.g)/options.g
    rhodmxmean= numpy.zeros((options.g,2))
    rhodmxcovar= numpy.zeros((options.g,2,2))
    for ii in range(options.g):
        rhodmxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4.
        rhodmxcovar[ii,0,0]= numpy.var(ydata[:,0])
        rhodmxcovar[ii,1,1]= numpy.var(ydata[:,1])
    extreme_deconvolution.extreme_deconvolution(ydata,ycovar,
                                                rhodmxamp,rhodmxmean,rhodmxcovar)
    #3) dlnvcdlnr vs. vc
    ydata= numpy.zeros((len(samples['vc']),2))
    ycovar= numpy.zeros((len(samples['vc']),2))
    ydata[:,0]= numpy.log(samples['vc'])
    ydata[:,1]= samples['dlnvcdlnr']
    vcxamp= numpy.ones(options.g)/options.g
    vcxmean= numpy.zeros((options.g,2))
    vcxcovar= numpy.zeros((options.g,2,2))
    for ii in range(options.g):
        vcxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4.
        vcxcovar[ii,0,0]= numpy.var(ydata[:,0])
        vcxcovar[ii,1,1]= numpy.var(ydata[:,1])
    extreme_deconvolution.extreme_deconvolution(ydata,ycovar,
                                                vcxamp,vcxmean,vcxcovar)
    save_pickles(options.plotfile,vcdxamp,vcdxmean,vcdxcovar,
                 rhodmxamp,rhodmxmean,rhodmxcovar,
                 vcxamp,vcxmean,vcxcovar)
    return None
Пример #46
0
                initamp = np.random.uniform(size=ngauss)
                initamp /= np.sum(initamp)
                m = np.median(vals)
                s = 1.4826 * np.median(np.fabs(vals - m))
                print ' iv, ir initial guess of median, sig=', ivel, irad, m, s
                initmean = []
                initcovar = []
                for ii in range(ngauss):
                    initcovar.append(s**2.)
                initcovar = np.array([[initcovar]]).T
                # Now let the means vary
                for ii in range(ngauss):
                    initmean.append(m + np.random.normal() * s)
                initmean = np.array([initmean]).T
                print("iv, ir, lnL",ivel,irad, \
                    extreme_deconvolution(ydata,ycovar, \
                    initamp,initmean,initcovar))
                print("iv, ir, amp, mean, std. dev.",ivel,irad, \
                    initamp,initmean[:,0], \
                    np.sqrt(initcovar[:,0,0]))
                # store the amp and mean
                # sort with amplitude
                sortindx = np.argsort(initamp)
                sortindx = sortindx[::-1]
                # print ' sorted amp, mean = ', initamp[sortindx], \
                #    initmean[sortindx,0]
                gauxd_amp[irad, :] = initamp[sortindx]
                gauxd_mean[irad, :] = initmean[sortindx, 0]
                gauxd_std[irad, :] = np.sqrt(initcovar[sortindx, 0, 0])
                gauxd_rr[irad] = rr

                # for plot
                xcovar[i][3][3] = sub[3]
            else:
                xcovar[i][0][0] = neu_sigma[i, 0] / 4
                xcovar[i][1][1] = neu_sigma[i, 1] / 4
                xcovar[i][2][2] = neu_sigma[i, 2] / 4
                xcovar[i][3][3] = neu_sigma[i, 3] / 4
    with open('before_dec.txt', 'w') as filehandle:
        filehandle.write("start xmean \n" + str(xmean) + '\n')
        filehandle.write("start xamp \n" + str(xamp1) + '\n')
        filehandle.write("start xcovar: \n" + str(xcovar) + '\n')

    t0 = time.time()
    l = extreme_deconvolution(ydata,
                              ycovar,
                              xamp1,
                              xmean,
                              xcovar,
                              weight=weights,
                              maxiter=it,
                              fixmean=True)
    t1 = time.time()
    xdc_time = t1 - t0

    with open('after_dec.txt', 'w') as filehandle:
        filehandle.write("new xmean \n" + str(xmean) + '\n')
        filehandle.write("new xamp \n" + str(xamp1) + '\n')
        filehandle.write("new xcovar: \n" + str(xcovar) + '\n')

    print(str(it) + " iteration(s) done")
    times[it] = xdc_time
    log_like[it] = l
    timesfile = open('iters_time', 'wb')
Пример #48
0
def xdGamma(parser):
    (options, args) = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return
    if options.outfilename is None:
        print "-o filename options needs to be set ..."
        print "Returning ..."
        return None
    if os.path.exists(options.outfilename):
        print options.outfilename + " exists ..."
        print "*Not* overwriting ..."
        print "Remove file before running ..."
        return
    numpy.random.seed(seed=options.seed)
    # Restore samples
    savefilename = args[0]
    print "Reading data ..."
    if os.path.exists(savefilename):
        savefile = open(savefilename, "rb")
        samples = pickle.load(savefile)
        type = pickle.load(savefile)
        band = pickle.load(savefile)
        mean = pickle.load(savefile)
        savefile.close()
    else:
        print "Input file does not exist ..."
        print "Returning ..."
        return
    # Prepare samples for XD
    print "Preparing data ..."
    if type == "powerlawSF":
        if len(band) > 1:
            print "multi-band not implemented yet ..."
            print "Returning ..."
            return
        else:
            nparams = 1  # RITABAN 2 for gamma and A
    elif type == "DRW":
        print "DRW not implemented yet ..."
        print "Returning ..."
        return
    elif type == "KS11":
        nparams = 1
    elif type == "scatter":
        print "scatter not implemented yet ..."
        print "Returning ..."
        return
    ndata = len(samples)
    ydata = numpy.zeros((ndata, nparams))
    ycovar = numpy.zeros((ndata, nparams, nparams))
    ngamp = numpy.zeros((ndata, options.g))
    ngmean = numpy.zeros((ndata, options.g, nparams))
    ngcovar = numpy.zeros((ndata, options.g, nparams, nparams))
    for ii, key in enumerate(samples.keys()):
        sys.stdout.write("\r" + _ERASESTR + "\r")
        sys.stdout.flush()
        sys.stdout.write("\rWorking on preparing %i / %i\r" % (ii + 1, ndata))
        sys.stdout.flush()
        if type == "powerlawSF":
            # Stack as A,g,Ac,gc
            loggammas = []
            # logAs= [] RITABAN
            for sample in samples[key]:
                loggammas.append(numpy.log(sample["gamma"][0]))
                # logAs.append(numpy.log(sample['logA'][0])) RITABAN
            loggammas = numpy.array(loggammas)
            ydata[ii, 0] = numpy.mean(loggammas)
            ycovar[ii, 0, 0] = numpy.var(loggammas)
            # logAs= numpy.array(logAs) RITABAN
            # ydata[ii,1]= numpy.mean(logAs) RITABAN
            # ycovar[ii,1,1]= numpy.var(logAs) RITABAN
            # Fit with g Gaussians
            thisydata = numpy.reshape(
                loggammas - ydata[ii, :], (len(loggammas), nparams)  # subtract mean to fit the error distribution
            )
            # RITABAN : The previous line can be replaced by
            # thisydata= ydata
            # I think
            thisycovar = (
                numpy.zeros((len(loggammas), nparams)) + numpy.var(loggammas) * 10.0 ** -4.0
            )  # regularize RITABAN you can probably leave this
            thisxamp = numpy.ones(options.g) / options.g
            thisxcovar = numpy.ones((options.g, nparams, nparams)) * numpy.var(loggammas)
            thisxmean = (
                numpy.ones((options.g, nparams)) * numpy.mean(loggammas)
                + numpy.std(loggammas) * numpy.random.normal(size=(options.g, nparams)) / 4.0
            )
            # RITABAN : previous two lines should be replaced by something like
            # starting at line 122 (xmean= numpy.zeros((options.k,nparams)))
            # print numpy.mean(loggammas), numpy.std(loggammas)
            extreme_deconvolution(thisydata, thisycovar, thisxamp, thisxmean, thisxcovar)
            ngamp[ii, :] = thisxamp
            ngmean[ii, :, :] = thisxmean
            ngcovar[ii, :, :, :] = thisxcovar
            if len(band) > 1:
                print "Multi-band not supported currently"
                print "Returning ..."
                return
        elif type == "DRW":
            print "DRW not supported currently"
            print "Returning ..."
            return
        elif type == "KS11":
            print "type == 'KS11' not implemented yet ..."
            print "Returning ..."
            return
    sys.stdout.write("\r" + _ERASESTR + "\r")
    sys.stdout.flush()
    # Outlier rejection
    # if type == 'powerlawSF':
    #    indx= (ydata[:,0] > -7.21)
    #    ydata= ydata[indx,:]
    #    ycovar= ycovar[indx,:,:]
    # Initial parameters for XD
    print "Running XD ..."
    xamp = numpy.ones(options.k) / float(options.k)
    xmean = numpy.zeros((options.k, nparams))
    for kk in range(options.k):
        xmean[kk, :] = numpy.mean(ydata, axis=0) + numpy.random.normal() * numpy.std(ydata, axis=0) / 4.0
    xcovar = numpy.zeros((options.k, nparams, nparams))
    for kk in range(options.k):
        xcovar[kk, :, :] = numpy.cov(ydata.T) * 2.0
    ll = extreme_deconvolution(ydata, ycovar, xamp, xmean, xcovar, ng=True, ngamp=ngamp, ngmean=ngmean, ngcovar=ngcovar)
    if True:
        print xamp
        print xmean
        print xcovar
        print ll
    # Prepare for saving
    print "Preparing output for saving ..."
    # Save
    print "Saving ..."
    if os.path.exists(options.outfilename):
        print options.outfilename + " exists ..."
        print "*Not* overwriting ..."
        print "Remove file before running ..."
        return
    if options.savefits:
        raise NotImplementedError("Fits saving not implemented yet")
        import pyfits

        cols = []
        if type == "powerlawSF":
            colA = []
            colg = []
            for kk in range(options.k):
                colA.append(outparams[kk]["logA"])
                colg.append(outparams[kk]["gamma"])
            colA = numpy.array(colA)
            colg = numpy.array(colg)
            colw = numpy.log(numpy.array(weights))
            cols.append(pyfits.Column(name="logA", format="E", array=colA))
            cols.append(pyfits.Column(name="gamma", format="E", array=colg))
        elif type == "KS11":
            colA = []
            colg = []
            cols = []
            for kk in range(options.k):
                colA.append(outparams[kk]["logA"])
                colg.append(outparams[kk]["gamma"])
                colg.append(outparams[kk]["s"])
            colA = numpy.array(colA)
            colg = numpy.array(colg)
            cols = numpy.array(colg)
            cols.append(pyfits.Column(name="logA", format="E", array=colA))
            cols.append(pyfits.Column(name="gamma", format="E", array=colg))
            cols.append(pyfits.Column(name="s", format="E", array=cols))
        colw = numpy.log(numpy.array(weights))
        cols.append(pyfits.Column(name="logweight", format="E", array=colw))
        columns = pyfits.ColDefs(cols)
        tbhdu = pyfits.new_table(columns)
        tbhdu.writeto(options.outfilename)
    else:
        outfile = open(options.outfilename, "wb")
        pickle.dump(xamp, outfile)
        pickle.dump(xmean, outfile)
        pickle.dump(xcovar, outfile)
        pickle.dump(ll, outfile)
        outfile.close()
    return
Пример #49
0
def xdSamples(parser):
    (options,args)= parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return
    if options.outfilename is None:
        print "-o filename options needs to be set ..."
        print "Returning ..."
        return None
    numpy.random.seed(seed=options.seed)
    #Restore samples
    savefilename= args[0]
    print "Reading data ..."
    if os.path.exists(savefilename):
        savefile= open(savefilename,'rb')
        samples= pickle.load(savefile)
        type= pickle.load(savefile)
        band= pickle.load(savefile)
        mean= pickle.load(savefile)
        savefile.close()
    else:
        print "Input file does not exist ..."
        print "Returning ..."
        return
    #Prepare samples for XD
    print "Preparing data ..."
    if type == 'powerlawSF':
        if len(band) > 1:
            nparams= 4
        else:
            nparams= 2
    elif type == 'DRW':
        if len(band) == 1:
            nparams= 2
        else:
            print "DRW for multi-band fits not implemented yet ..."
            print "Returning ..."
            return
    elif type == 'KS11':
        nparams= 3
    elif type == 'scatter':
        nparams= 1
    ii= 0
    ndata= len(samples)
    ydata= numpy.zeros((ndata,nparams))
    ycovar= numpy.zeros((ndata,nparams,nparams))
    for key in samples.keys():
        if type == 'powerlawSF':
           #Stack as A,g,Ac,gc
            logAs, loggammas= [], []
            for sample in samples[key]:
                logAs.append(sample['logA'][0])
                loggammas.append(numpy.log(sample['gamma'][0]))
            logAs= numpy.array(logAs)
            loggammas= numpy.array(loggammas)
            ydata[ii,0]= numpy.mean(logAs)
            ydata[ii,1]= numpy.mean(loggammas)
            ycovar[ii,:,:]= numpy.cov(numpy.vstack((logAs,loggammas)))
            if len(band) > 1:
                print "Multi-band not supported currently"
                print "Returning ..."
                return
                kIn[:,2]= numpy.array([p['logAgr'] for p in params.values()]).reshape(ndata)
                kIn[:,3]= numpy.array([p['gammagr'] for p in params.values()]).reshape(ndata)
        elif type == 'DRW':
           #Stack as loga2, logl
            loga2s, logls= [], []
            for sample in samples[key]:
                loga2s.append(sample['loga2'][0])
                logls.append(sample['logl'][0])
            loga2s= numpy.array(loga2s)
            logls= numpy.array(logls)
            ydata[ii,0]= numpy.mean(loga2s)
            ydata[ii,1]= numpy.mean(logls)
            ycovar[ii,:,:]= numpy.cov(numpy.vstack((loga2s,logls)))
            if len(band) > 1:
                print "Multi-band not supported currently"
                print "Returning ..."
                return
        elif type == 'KS11':
            print "type == 'KS11' not implemented yet ..."
            print "Returning ..."
            return
            #Stack as A,g,s
            kIn[:,0]= numpy.array([p['logA'] for p in params.values()]).reshape(ndata)
            kIn[:,1]= numpy.array([p['gamma'] for p in params.values()]).reshape(ndata)
            kIn[:,2]= numpy.array([p['s'] for p in params.values()]).reshape(ndata)
        ii+= 1
    #Outlier rejection
    #if type == 'powerlawSF':
    #    indx= (ydata[:,0] > -7.21)
    #    ydata= ydata[indx,:]
    #    ycovar= ycovar[indx,:,:]
    #Initial parameters for XD
    print "Running XD ..."
    xamp= numpy.ones(options.k)/float(options.k)
    xmean= numpy.zeros((options.k,nparams))
    for kk in range(options.k):
        xmean[kk,:]= numpy.mean(ydata,axis=0)\
            +numpy.random.normal()*numpy.std(ydata,axis=0)
    xcovar= numpy.zeros((options.k,nparams,nparams))
    for kk in range(options.k):
        xcovar[kk,:,:]= numpy.cov(ydata.T)
    extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar)
    #Prepare for saving
    print "Preparing output for saving ..."
    #Save
    print "Saving ..."
    if os.path.exists(options.outfilename):
        print options.outfilename+" exists ..."
        print "*Not* overwriting ..."
        print "Remove file before running ..."
        return
    if options.savefits:
        raise NotImplementedError("Fits saving not implemented yet")
        import pyfits
        cols= []
        if type == 'powerlawSF':
            colA= []
            colg= []
            for kk in range(options.k):
                colA.append(outparams[kk]['logA'])
                colg.append(outparams[kk]['gamma'])
            colA= numpy.array(colA)
            colg= numpy.array(colg)
            colw= numpy.log(numpy.array(weights))
            cols.append(pyfits.Column(name='logA',format='E',
                                      array=colA))
            cols.append(pyfits.Column(name='gamma',format='E',
                                      array=colg))
        elif type == 'KS11':
            colA= []
            colg= []
            cols= []
            for kk in range(options.k):
                colA.append(outparams[kk]['logA'])
                colg.append(outparams[kk]['gamma'])
                colg.append(outparams[kk]['s'])
            colA= numpy.array(colA)
            colg= numpy.array(colg)
            cols= numpy.array(colg)
            cols.append(pyfits.Column(name='logA',format='E',
                                      array=colA))
            cols.append(pyfits.Column(name='gamma',format='E',
                                      array=colg))
            cols.append(pyfits.Column(name='s',format='E',
                                      array=cols))           
        colw= numpy.log(numpy.array(weights))
        cols.append(pyfits.Column(name='logweight',format='E',
                                  array=colw))
        columns= pyfits.ColDefs(cols)
        tbhdu= pyfits.new_table(columns)
        tbhdu.writeto(options.outfilename)
    else:
        outfile= open(options.outfilename,'wb')
        pickle.dump(xamp,outfile)
        pickle.dump(xmean,outfile)
        pickle.dump(xcovar,outfile)
        outfile.close()
    return
Пример #50
0
    constant = ngauss / 5
    #print(constant)
    xamp1 = np.ones(ngauss) / (ngauss - constant)
    xamp2 = np.ones(ngauss) / (ngauss - constant)
    xmean = neurons[0:ngauss, :]
    xcovar = np.zeros([ngauss, dx, dx])
    #print(np.shape(xcovar))
    #xcovar = np.cov(neurons.T)
    for i in range(ngauss):
        xcovar[i][0][0] = 0.00044115
        xcovar[i][1][1] = 0.00074033
        xcovar[i][2][2] = 0.00216775
        xcovar[i][3][3] = 0.0073491

    t0 = time.time()
    l = extreme_deconvolution(ydata,
                              ycovar,
                              xamp1,
                              xmean,
                              xcovar,
                              weight=weights)
    t1 = time.time()
    xdc_time = t1 - t0
    filename = "threshold_" + str(new_ndata.shape[0])
    with open(filename, 'w') as filehandle:
        filehandle.write("ndata: " + str(new_ndata.shape[0]) + '\n')
        filehandle.write("ngauss: " + str(ngauss) + '\n')
        filehandle.write("gng: " + str(gng_time) + '\n')
        filehandle.write("binning: " + str(bin_time) + '\n')
        filehandle.write("deconvolution: " + str(xdc_time) + '\n')
Пример #51
0
def xdSamples(parser):
    (options, args) = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return
    if options.outfilename is None:
        print "-o filename options needs to be set ..."
        print "Returning ..."
        return None
    numpy.random.seed(seed=options.seed)
    #Restore samples
    savefilename = args[0]
    print "Reading data ..."
    if os.path.exists(savefilename):
        savefile = open(savefilename, 'rb')
        samples = pickle.load(savefile)
        type = pickle.load(savefile)
        band = pickle.load(savefile)
        mean = pickle.load(savefile)
        savefile.close()
    else:
        print "Input file does not exist ..."
        print "Returning ..."
        return
    #Prepare samples for XD
    print "Preparing data ..."
    if type == 'powerlawSF':
        if len(band) > 1:
            nparams = 4
        else:
            nparams = 2
    elif type == 'DRW':
        if len(band) == 1:
            nparams = 2
        else:
            print "DRW for multi-band fits not implemented yet ..."
            print "Returning ..."
            return
    elif type == 'KS11':
        nparams = 3
    elif type == 'scatter':
        nparams = 1
    ii = 0
    ndata = len(samples)
    ydata = numpy.zeros((ndata, nparams))
    ycovar = numpy.zeros((ndata, nparams, nparams))
    for key in samples.keys():
        if type == 'powerlawSF':
            #Stack as A,g,Ac,gc
            logAs, loggammas = [], []
            for sample in samples[key]:
                logAs.append(sample['logA'][0])
                loggammas.append(numpy.log(sample['gamma'][0]))
            logAs = numpy.array(logAs)
            loggammas = numpy.array(loggammas)
            ydata[ii, 0] = numpy.mean(logAs)
            ydata[ii, 1] = numpy.mean(loggammas)
            ycovar[ii, :, :] = numpy.cov(numpy.vstack((logAs, loggammas)))
            if len(band) > 1:
                print "Multi-band not supported currently"
                print "Returning ..."
                return
                kIn[:,
                    2] = numpy.array([p['logAgr']
                                      for p in params.values()]).reshape(ndata)
                kIn[:,
                    3] = numpy.array([p['gammagr']
                                      for p in params.values()]).reshape(ndata)
        elif type == 'DRW':
            #Stack as loga2, logl
            loga2s, logls = [], []
            for sample in samples[key]:
                loga2s.append(sample['loga2'][0])
                logls.append(sample['logl'][0])
            loga2s = numpy.array(loga2s)
            logls = numpy.array(logls)
            ydata[ii, 0] = numpy.mean(loga2s)
            ydata[ii, 1] = numpy.mean(logls)
            ycovar[ii, :, :] = numpy.cov(numpy.vstack((loga2s, logls)))
            if len(band) > 1:
                print "Multi-band not supported currently"
                print "Returning ..."
                return
        elif type == 'KS11':
            print "type == 'KS11' not implemented yet ..."
            print "Returning ..."
            return
            #Stack as A,g,s
            kIn[:, 0] = numpy.array([p['logA']
                                     for p in params.values()]).reshape(ndata)
            kIn[:, 1] = numpy.array([p['gamma']
                                     for p in params.values()]).reshape(ndata)
            kIn[:, 2] = numpy.array([p['s']
                                     for p in params.values()]).reshape(ndata)
        ii += 1
    #Outlier rejection
    #if type == 'powerlawSF':
    #    indx= (ydata[:,0] > -7.21)
    #    ydata= ydata[indx,:]
    #    ycovar= ycovar[indx,:,:]
    #Initial parameters for XD
    print "Running XD ..."
    xamp = numpy.ones(options.k) / float(options.k)
    xmean = numpy.zeros((options.k, nparams))
    for kk in range(options.k):
        xmean[kk,:]= numpy.mean(ydata,axis=0)\
            +numpy.random.normal()*numpy.std(ydata,axis=0)
    xcovar = numpy.zeros((options.k, nparams, nparams))
    for kk in range(options.k):
        xcovar[kk, :, :] = numpy.cov(ydata.T)
    extreme_deconvolution(ydata, ycovar, xamp, xmean, xcovar)
    #Prepare for saving
    print "Preparing output for saving ..."
    #Save
    print "Saving ..."
    if os.path.exists(options.outfilename):
        print options.outfilename + " exists ..."
        print "*Not* overwriting ..."
        print "Remove file before running ..."
        return
    if options.savefits:
        raise NotImplementedError("Fits saving not implemented yet")
        import pyfits
        cols = []
        if type == 'powerlawSF':
            colA = []
            colg = []
            for kk in range(options.k):
                colA.append(outparams[kk]['logA'])
                colg.append(outparams[kk]['gamma'])
            colA = numpy.array(colA)
            colg = numpy.array(colg)
            colw = numpy.log(numpy.array(weights))
            cols.append(pyfits.Column(name='logA', format='E', array=colA))
            cols.append(pyfits.Column(name='gamma', format='E', array=colg))
        elif type == 'KS11':
            colA = []
            colg = []
            cols = []
            for kk in range(options.k):
                colA.append(outparams[kk]['logA'])
                colg.append(outparams[kk]['gamma'])
                colg.append(outparams[kk]['s'])
            colA = numpy.array(colA)
            colg = numpy.array(colg)
            cols = numpy.array(colg)
            cols.append(pyfits.Column(name='logA', format='E', array=colA))
            cols.append(pyfits.Column(name='gamma', format='E', array=colg))
            cols.append(pyfits.Column(name='s', format='E', array=cols))
        colw = numpy.log(numpy.array(weights))
        cols.append(pyfits.Column(name='logweight', format='E', array=colw))
        columns = pyfits.ColDefs(cols)
        tbhdu = pyfits.new_table(columns)
        tbhdu.writeto(options.outfilename)
    else:
        outfile = open(options.outfilename, 'wb')
        pickle.dump(xamp, outfile)
        pickle.dump(xmean, outfile)
        pickle.dump(xcovar, outfile)
        outfile.close()
    return