def test_single_gauss_1d_varunc_logweights(): # Generate data from a single Gaussian, recover mean and variance, with weights ndata = 3001 ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T # twice oversample > 0 ydata[numpy.arange(3001) > 2000]=\ numpy.fabs(ydata[numpy.arange(3001) > 2000]) weight = numpy.ones(ndata) weight[ydata[:, 0] > 0] = 0.5 ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 1 initamp = numpy.ones(K) initmean = numpy.atleast_2d(numpy.mean(ydata) + numpy.std(ydata)) initcovar = numpy.atleast_3d(3. * numpy.var(ydata)) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, weight=numpy.log(weight), logweight=True) # Test tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean - 0. ) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties' assert numpy.fabs( initcovar - 1. ) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties' return None
def test_single_gauss_2d_nounc(): # Generate data from a single Gaussian, recover mean and variance ndata = 3001 ydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]]) ycovar = numpy.zeros_like(ydata) # initialize fit K = 1 initamp = numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\ +numpy.std(ydata,axis=0)) initcovar = numpy.atleast_3d(numpy.cov(ydata, rowvar=False)).T # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar) # Test tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean[0, 0] - 1. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initmean[0, 1] - 2. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 0] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 1, 1] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 1] - 0. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' return None
def XDapogee(options,args): #First load the chains savefile= open(args[0],'rb') thesesamples= pickle.load(savefile) savefile.close() vcs= numpy.array([s[0] for s in thesesamples])*_APOGEEREFV0/_REFV0 dvcdrs= numpy.array([s[6] for s in thesesamples])*30. #To be consistent with this project's dlnvcdlnr print numpy.mean(vcs) print numpy.mean(dvcdrs) #Now fit XD to the 2D PDFs ydata= numpy.zeros((len(vcs),2)) ycovar= numpy.zeros((len(vcs),2)) ydata[:,0]= numpy.log(vcs) ydata[:,1]= dvcdrs vcxamp= numpy.ones(options.g)/options.g vcxmean= numpy.zeros((options.g,2)) vcxcovar= numpy.zeros((options.g,2,2)) for ii in range(options.g): vcxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4. vcxcovar[ii,0,0]= numpy.var(ydata[:,0]) vcxcovar[ii,1,1]= numpy.var(ydata[:,1]) extreme_deconvolution.extreme_deconvolution(ydata,ycovar, vcxamp,vcxmean,vcxcovar) save_pickles(options.plotfile, vcxamp,vcxmean,vcxcovar) print vcxamp print vcxmean[:,0] print vcxmean[:,1] return None
def test_single_gauss_2d_offdiagunc(): # Generate data from a single Gaussian, recover mean and variance ndata= 3001 ydata= numpy.random.normal(size=(ndata,2))+numpy.array([[1.,2.]]) tycovar= numpy.ones_like(ydata)\ *numpy.random.uniform(size=(ndata,2))/2. ydata+= numpy.random.normal(size=(ndata,2))*numpy.sqrt(tycovar) ycovar= numpy.empty((ndata,2,2)) for ii in range(ndata): ycovar[ii]= numpy.diag(tycovar[ii]) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\ +numpy.std(ydata,axis=0)) initcovar= numpy.atleast_3d(numpy.cov(ydata,rowvar=False)).T # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar) # Test tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean[0,0]-1.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initmean[0,1]-2.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,0,0]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,1,1]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,0,1]-0.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' return None
def test_fixmean_single_gauss_1d_nounc(): # Generate data from a single Gaussian, recover variance, fixing mean ndata = 3001 ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T + 1. ycovar = numpy.zeros_like(ydata) # initialize fit K = 1 initamp = numpy.ones(K) initmean = numpy.array([[1.]]) initcovar = numpy.atleast_3d(3. * numpy.var(ydata)) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, fixmean=True) # Test tol = 10. / numpy.sqrt(ndata) assert numpy.fabs(initmean - 1.) < 10.**-10., 'XD did not fixmean for single Gaussian' assert numpy.fabs( initcovar - 1. ) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties, fixing mean' return None
def test_single_gauss_2d_diagunc_proj(): # Generate data from a single Gaussian, recover mean and variance ndata= 3001 tydata= numpy.random.normal(size=(ndata,2))+numpy.array([[1.,2.]]) # Randomly project ydata= numpy.zeros((ndata,1)) proj_x= numpy.random.binomial(2,0.5,ndata) ydata[proj_x==0,0]= tydata[proj_x==0,0] ydata[proj_x==1,0]= tydata[proj_x==1,1] ydata[proj_x==2,0]= tydata[proj_x==2,0]+tydata[proj_x==2,1] projection= numpy.empty((ndata,1,2)) projection[proj_x==0]= numpy.array([[[1.,0.]]]) projection[proj_x==1]= numpy.array([[[0.,1.]]]) projection[proj_x==2]= numpy.array([[[1.,1.]]]) ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.array([[0.,1.]]) initcovar= numpy.array([[[2.,-1.],[-1.,3.]]]) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, projection=projection) # Test tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean[0,0]-1.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initmean[0,1]-2.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,0,0]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,1,1]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,0,1]-0.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' return None
def test_fixamp_alt_dual_gauss_1d_nounc(): # Generate data from two Gaussians, recover mean and variance ndata= 3001 amp_true= 0.3 assign= numpy.random.binomial(1,1.-amp_true,ndata) ydata= numpy.zeros((ndata,1)) ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-2. ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1. ycovar= numpy.zeros_like(ydata) # initialize fit K= 2 initamp= numpy.array([amp_true,1.-amp_true]) initmean= numpy.array([[-1.],[2.]]) initcovar= numpy.zeros((K,1,1)) numpy.random.uniform() # hack to get diff init for kk in range(K): initcovar[kk]= numpy.mean(3.*numpy.var(ydata)) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, fixamp=[True,False]) # should be same as =True # Test tol= 12./numpy.sqrt(ndata) first= initamp < 0.5 assert numpy.fabs(initamp[first]-amp_true) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties' assert numpy.fabs(initmean[first]--2.) < tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp' assert numpy.fabs(initcovar[first]-1.) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp' second= initamp >= 0.5 assert numpy.fabs(initamp[second]-(1.-amp_true)) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties' assert numpy.fabs(initmean[second]-1.) < 2.*tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp' assert numpy.fabs(initcovar[second]-4.) < 2.*tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp' return None
def test_single_gauss_1d_varunc_log_loglikeonly(): # Same as in test_oned, but now also log ndata= 3001 ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata)+numpy.std(ydata)) initcovar= numpy.atleast_3d(3.*numpy.var(ydata)) # Run XD logfile= 'test_log' extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, logfile=logfile) # First test that fit worked tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties' assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties' # Now compute the likelihood and check that it is the same as in the logfile lnl= extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, likeonly=True) with open(logfile+'_loglike.log') as log: lines= log.readlines() assert numpy.fabs(float(lines[-3])-lnl) < 10.**-6., 'loglike computed using likeonly is not the same as in the logfile' os.remove(logfile+'_c.log') os.remove(logfile+'_loglike.log') return None
def test_fixcovar_dual_gauss_1d_nounc(): # Generate data from two Gaussians, recover mean and variance ndata= 3001 amp_true= 0.3 assign= numpy.random.binomial(1,1.-amp_true,ndata) ydata= numpy.zeros((ndata,1)) ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-2. ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1. ycovar= numpy.zeros_like(ydata) # initialize fit K= 2 initamp= numpy.ones(K)/float(K) initmean= numpy.array([[-1.],[2.]]) initcovar= numpy.zeros((K,1,1)) for kk in range(K): if kk == 0: initcovar[kk]= 1. elif kk == 1: initcovar[kk]= 4. # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, fixcovar=True) # Test tol= 12./numpy.sqrt(ndata) first= initamp < 0.5 assert numpy.fabs(initamp[first]-amp_true) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing covar' assert numpy.fabs(initmean[first]--2.) < tol, 'XD does not recover mean for dual Gaussian w/o uncertainties, fixing covar' assert numpy.fabs(initcovar[first]-1.) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing mean' second= initamp >= 0.5 assert numpy.fabs(initamp[second]-(1.-amp_true)) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing covar' assert numpy.fabs(initmean[second]-1.) < 2.*tol, 'XD does not recover mean for dual Gaussian w/o uncertainties' assert numpy.fabs(initcovar[second]-4.) < 10.**-10., 'XD did not fixcovar for dual Gaussian w/o uncertainties' return None
def test_single_gauss_1d_varunc_log(): # Same as in test_oned, but now also log ndata= 3001 ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata)+numpy.std(ydata)) initcovar= numpy.atleast_3d(3.*numpy.var(ydata)) # Run XD logfile= 'test_log' extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, logfile=logfile) # First test that fit worked tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties' assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties' # Now test that the logfiles exist assert os.path.exists(logfile+'_c.log'), 'XD did not produce _c.log logfile when asked' num_lines= sum(1 for line in open(logfile+'_c.log')) assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be" assert os.path.exists(logfile+'_loglike.log'), 'XD did not produce _loglike.log logfile when asked' num_lines= sum(1 for line in open(logfile+'_loglike.log')) assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be" os.remove(logfile+'_c.log') os.remove(logfile+'_loglike.log') return None
def test_dual_gauss_1d_constunc(): # Generate data from two Gaussians, recover mean and variance ndata= 3001 amp_true= 0.3 assign= numpy.random.binomial(1,1.-amp_true,ndata) ydata= numpy.zeros((ndata,1)) ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-2. ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1. ycovar= numpy.ones_like(ydata)*0.25 ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 2 initamp= numpy.ones(K)/float(K) initmean= numpy.array([[-1.],[0.]]) initcovar= numpy.zeros((K,1,1)) for kk in range(K): initcovar[kk]= numpy.mean(3.*numpy.var(ydata)) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar) # Test tol= 20./numpy.sqrt(ndata) first= initamp < 0.5 assert numpy.fabs(initamp[first]-amp_true) < tol, 'XD does not recover correct amp for dual Gaussian w/ constant uncertainties' assert numpy.fabs(initmean[first]--2.) < tol, 'XD does not recover correct mean for dual Gaussian w/ constant uncertainties' assert numpy.fabs(initcovar[first]-1.) < tol, 'XD does not recover correct variance for dual Gaussian w/ constant uncertainties' second= initamp >= 0.5 assert numpy.fabs(initamp[second]-(1.-amp_true)) < tol, 'XD does not recover correct amp for dual Gaussian w/ constant uncertainties' assert numpy.fabs(initmean[second]-1.) < 2.*tol, 'XD does not recover correct mean for dual Gaussian w/ constant uncertainties' assert numpy.fabs(initcovar[second]-4.) < 2.*tol, 'XD does not recover correct variance for dual Gaussian w/ constant uncertainties' return None
def test_single_gauss_2d_diagunc_logfile(): # Generate data from a single Gaussian, recover mean and variance # Also log ndata= 3001 ydata= numpy.random.normal(size=(ndata,2))+numpy.array([[1.,2.]]) ycovar= numpy.ones_like(ydata)\ *numpy.random.uniform(size=(ndata,2))/2. ydata+= numpy.random.normal(size=(ndata,2))*numpy.sqrt(ycovar) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\ +numpy.std(ydata,axis=0)) initcovar= numpy.atleast_3d(numpy.cov(ydata,rowvar=False)).T # Run XD logfile= 'test_log' extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, logfile=logfile) # First test that the fit worked tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean[0,0]-1.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initmean[0,1]-2.) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,0,0]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,1,1]-1.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs(initcovar[0,0,1]-0.) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' # Now test that the logfiles exist assert os.path.exists(logfile+'_c.log'), 'XD did not produce _c.log logfile when asked' num_lines= sum(1 for line in open(logfile+'_c.log')) assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be" assert os.path.exists(logfile+'_loglike.log'), 'XD did not produce _loglike.log logfile when asked' num_lines= sum(1 for line in open(logfile+'_loglike.log')) assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be" os.remove(logfile+'_c.log') os.remove(logfile+'_loglike.log') return None
def test_single_gauss_1d_varunc_logweights(): # Generate data from a single Gaussian, recover mean and variance, with weights ndata= 3001 ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T # twice oversample > 0 ydata[numpy.arange(3001) > 2000]=\ numpy.fabs(ydata[numpy.arange(3001) > 2000]) weight= numpy.ones(ndata) weight[ydata[:,0]>0]= 0.5 ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata)+numpy.std(ydata)) initcovar= numpy.atleast_3d(3.*numpy.var(ydata)) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, weight=numpy.log(weight),logweight=True) # Test tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties' assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties' return None
def test_triple_gauss_1d_varunc_alsow(): # Generate data from two Gaussians, recover mean and variance ndata = 3001 amp_true = [0.3, 0.1, 0.6] assign = numpy.random.choice(numpy.arange(3), p=amp_true, size=ndata) ydata = numpy.zeros((ndata, 1)) ydata[assign == 0, 0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 4. ydata[assign == 1, 0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1. ydata[assign == 2, 0] = numpy.random.normal(size=numpy.sum(assign == 2)) * 1.5 + 8. ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 3 initamp = numpy.ones(K) / float(K) initmean = numpy.array([[-1.], [0.], [1.]]) initcovar = numpy.zeros((K, 1, 1)) for kk in range(K): initcovar[kk] = numpy.mean(3. * numpy.var(ydata)) # Run XD, w shouldn't make much difference extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, w=0.1) # Test tol = 25. / numpy.sqrt(ndata) first = initamp > 0.5 assert numpy.fabs( initamp[first] - amp_true[2] ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs( initmean[first] - 8. ) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs( initcovar[first] - 1.5**2. ) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' second = (initamp <= 0.5) * (initamp > 0.2) assert numpy.fabs( initamp[second] - amp_true[0] ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs( initmean[second] - -4. ) < 2. * tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs( initcovar[second] - 1. ) < 2. * tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' third = (initamp <= 0.2) assert numpy.fabs( initamp[third] - amp_true[1] ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs( initmean[third] - 1. ) < 4. * tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs( initcovar[third] - 4. ) < 4. * tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' return None
def fit_gaia_baseline(datafile, output_prefix, K, epochs, w_reg, k_means_iters): data = np.load(datafile) train_data = SGDDeconvDataset(torch.Tensor(data['X_train']), torch.Tensor(data['C_train'])) loader = data_utils.DataLoader(train_data, batch_size=5000, num_workers=4, shuffle=True) start_time = time.time() counts, centroids = minibatch_k_means(loader, k=K, max_iters=10) weights = (counts / counts.sum()).numpy() means = centroids.numpy() covars = np.array(K * [np.eye(7)]) ll = extreme_deconvolution(data['X_train'], data['C_train'], weights, means, covars, w=w_reg, logfile=str(output_prefix) + '_log', maxiter=epochs) end_time = time.time() train_score = ll * data['X_train'].shape[0] val_ll = extreme_deconvolution(data['X_val'], data['C_val'], weights, means, covars, w=w_reg, likeonly=True) val_score = val_ll * data['X_val'].shape[0] print('Training score: {}'.format(train_score)) print('Val score: {}'.format(val_score)) results = { 'start_time': start_time, 'end_time': end_time, 'train_score': train_score, 'val_score': val_score, } json.dump(results, open(str(output_prefix) + '_results.json', mode='w')) np.savez(output_prefix + '_params.npz', weights=weights, means=means, covar=covars)
def _xdFit(X, XErr, nGauss, n_iter=10): gmm = GMM(nGauss, n_iter=n_iter, covariance_type='full').fit(X) amp = gmm.weights_ mean = gmm.means_ covar = gmm.covars_ xd.extreme_deconvolution(X, XErr, amp, mean, covar) clf = XDGMM(nGauss) clf.alpha = amp clf.mu = mean clf.V = covar return clf
def test_single_gauss_2d_diagunc_logfile(): # Generate data from a single Gaussian, recover mean and variance # Also log ndata = 3001 ydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]]) ycovar= numpy.ones_like(ydata)\ *numpy.random.uniform(size=(ndata,2))/2. ydata += numpy.random.normal(size=(ndata, 2)) * numpy.sqrt(ycovar) # initialize fit K = 1 initamp = numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\ +numpy.std(ydata,axis=0)) initcovar = numpy.atleast_3d(numpy.cov(ydata, rowvar=False)).T # Run XD logfile = 'test_log' extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, logfile=logfile) # First test that the fit worked tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean[0, 0] - 1. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initmean[0, 1] - 2. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 0] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 1, 1] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 1] - 0. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' # Now test that the logfiles exist assert os.path.exists( logfile + '_c.log'), 'XD did not produce _c.log logfile when asked' num_lines = sum(1 for line in open(logfile + '_c.log')) assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be" assert os.path.exists( logfile + '_loglike.log'), 'XD did not produce _loglike.log logfile when asked' num_lines = sum(1 for line in open(logfile + '_loglike.log')) assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be" os.remove(logfile + '_c.log') os.remove(logfile + '_loglike.log') return None
def test_fixmean_fixone_dual_gauss_1d_nounc(): # Generate data from two Gaussians, recover mean and variance ndata = 3001 amp_true = 0.3 assign = numpy.random.binomial(1, 1. - amp_true, ndata) ydata = numpy.zeros((ndata, 1)) ydata[assign == 0, 0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 2. ydata[assign == 1, 0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1. ycovar = numpy.zeros_like(ydata) # initialize fit K = 2 initamp = numpy.ones(K) / float(K) initmean = numpy.zeros((K, 1)) initcovar = numpy.zeros((K, 1, 1)) for kk in range(K): if kk == 0: initmean[kk] = -2. elif kk == 1: initmean[kk] = 1.5 initcovar[kk] = numpy.mean(3. * numpy.var(ydata)) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, fixmean=[True, False]) # Test tol = 12. / numpy.sqrt(ndata) first = initamp < 0.5 assert numpy.fabs( initamp[first] - amp_true ) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing one mean' assert numpy.fabs( initmean[first] - -2. ) < 10.**-10., 'XD did not fixmean for dual Gaussian w/o uncertainties' assert numpy.fabs( initcovar[first] - 1. ) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing one mean' second = initamp >= 0.5 assert numpy.fabs( initamp[second] - (1. - amp_true) ) < tol, 'XD does not recover amp for dual Gaussian w/o uncertainties, fixing one mean' assert numpy.fabs( initmean[second] - 1. ) < 2. * tol, 'XD does not recover mean for dual Gaussian w/o uncertainties, fixing one mean' assert numpy.fabs( initcovar[second] - 4. ) < 2. * tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing one mean' return None
def XD_ND_Ncomp(data, covar, n_components:int=3, init_guess='default', print_init=False, plot=True): """ Input: data: (ndata, ndim) covar: (ndata, ndim, ndim) n_components: number of components to fit """ ### initialize fit with GMM K= n_components initamp= np.ones(K)/float(K) if init_guess == 'default': initmean, initcovar = initial_guess_from_GMM(data,n_components) else: print('manual init') initmean, initcovar = init_guess if print_init: print('initial') print('initamp: ',initamp) print('initmean: ',initmean) print('initcovar: ',initcovar) print() print('ydata.shape: ', ydata.shape) print('ycovar.shape: ', ycovar.shape) print('initamp.shape: ', initamp.shape) print('initmean.shape: ', initmean.shape) print('initcovar.shape: ', initcovar.shape) print() # Running XD extreme_deconvolution(data,covar,initamp,initmean,initcovar,maxsnm=True) print('XD - fit') print('amp: ',initamp) print('mean: ',initmean) print('cov:',initcovar) if plot: # Plotting the results plt.scatter(data[:,0], data[:,1]) for comp in initmean: plt.scatter(*comp, c='r') return initamp, initmean, initcovar
def run_xd(dafe): """Run XD on the delta afes""" ydata= numpy.empty((len(dafe),1)) ycovar= numpy.zeros((len(dafe),1)) ydata[:,0]= dafe xamp= numpy.array([0.5,0.5]) xmean= numpy.empty((2,1)) xcovar= numpy.empty((2,1,1)) xmean[0,0]= 0. xmean[1,0]= -0.12 xcovar[0,0,0]= 0.07 xcovar[1,0,0]= 0.07 extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar,fixmean=[False,True]) return (xamp,xmean,xcovar)
def test_fixamp_alt2_dual_gauss_1d_nounc(): # Generate data from two Gaussians, recover mean and variance ndata = 3001 amp_true = 0.3 assign = numpy.random.binomial(1, 1. - amp_true, ndata) ydata = numpy.zeros((ndata, 1)) ydata[assign == 0, 0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 2. ydata[assign == 1, 0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1. ycovar = numpy.zeros_like(ydata) # initialize fit K = 2 initamp = numpy.array([amp_true, 1. - amp_true]) initmean = numpy.array([[-1.], [2.]]) initcovar = numpy.zeros((K, 1, 1)) numpy.random.uniform() # hack to get diff init for kk in range(K): initcovar[kk] = numpy.mean(3. * numpy.var(ydata)) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, fixamp=[1]) # should be same as =True # Test tol = 12. / numpy.sqrt(ndata) first = initamp < 0.5 assert numpy.fabs( initamp[first] - amp_true ) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties' assert numpy.fabs( initmean[first] - -2. ) < tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp' assert numpy.fabs( initcovar[first] - 1. ) < tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp' second = initamp >= 0.5 assert numpy.fabs( initamp[second] - (1. - amp_true) ) < 10.**-10., 'XD did not fixamp for dual Gaussian w/o uncertainties' assert numpy.fabs( initmean[second] - 1. ) < 2. * tol, 'XD does not recover correct mean for dual Gaussian w/o uncertainties, fixing amp' assert numpy.fabs( initcovar[second] - 4. ) < 2. * tol, 'XD does not recover correct variance for dual Gaussian w/o uncertainties, fixing amp' return None
def test_triple_gauss_1d_varunc_snm_log(): # Like in oned, but also log ndata= 3001 amp_true= [0.1,0.3,0.6] assign= numpy.random.choice(numpy.arange(3),p=amp_true,size=ndata) ydata= numpy.zeros((ndata,1)) ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-4. ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1. ydata[assign==2,0]= numpy.random.normal(size=numpy.sum(assign==2))*1.5+8. ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 3 initamp= numpy.ones(K)/float(K) initmean= numpy.array([[-1.],[0.],[1.]]) initcovar= numpy.zeros((K,1,1)) for kk in range(K): initcovar[kk]= numpy.mean(3.*numpy.var(ydata)) # Run XD logfile= 'test_log' extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, maxsnm=True,logfile=logfile) # Test tol= 25./numpy.sqrt(ndata) first= initamp > 0.5 assert numpy.fabs(initamp[first]-amp_true[2]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs(initmean[first]-8.) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs(initcovar[first]-1.5**2.) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' second= (initamp <= 0.5)*(initamp > 0.2) assert numpy.fabs(initamp[second]-amp_true[0]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs(initmean[second]-1.) < 4.*tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs(initcovar[second]-4.) < 4.*tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' third= (initamp <= 0.2) assert numpy.fabs(initamp[third]-amp_true[1]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs(initmean[third]--4.) < 2.*tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs(initcovar[third]-1.) < 2.*tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' # Now test that the logfiles exist assert os.path.exists(logfile+'_c.log'), 'XD did not produce _c.log logfile when asked' num_lines= sum(1 for line in open(logfile+'_c.log')) assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be" assert os.path.exists(logfile+'_loglike.log'), 'XD did not produce _loglike.log logfile when asked' num_lines= sum(1 for line in open(logfile+'_loglike.log')) assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be" os.remove(logfile+'_c.log') os.remove(logfile+'_loglike.log') return None
def test_single_gauss_2d_diagunc_proj(): # Generate data from a single Gaussian, recover mean and variance ndata = 3001 tydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]]) # Randomly project ydata = numpy.zeros((ndata, 1)) proj_x = numpy.random.binomial(2, 0.5, ndata) ydata[proj_x == 0, 0] = tydata[proj_x == 0, 0] ydata[proj_x == 1, 0] = tydata[proj_x == 1, 1] ydata[proj_x == 2, 0] = tydata[proj_x == 2, 0] + tydata[proj_x == 2, 1] projection = numpy.empty((ndata, 1, 2)) projection[proj_x == 0] = numpy.array([[[1., 0.]]]) projection[proj_x == 1] = numpy.array([[[0., 1.]]]) projection[proj_x == 2] = numpy.array([[[1., 1.]]]) ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 1 initamp = numpy.ones(K) initmean = numpy.array([[0., 1.]]) initcovar = numpy.array([[[2., -1.], [-1., 3.]]]) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, projection=projection) # Test tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean[0, 0] - 1. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initmean[0, 1] - 2. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 0] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 1, 1] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 1] - 0. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' return None
def score_baseline(datafile, results_dir, output_file): data = np.load(datafile) rf = os.listdir(results_dir) param_files = [ f for f in rf if f.startswith('baseline_512') and f.endswith('.npz') ] scores = [] for p in param_files: params = np.load(results_dir + p) weights = params['weights'] means = params['means'] covars = params['covar'] test_score = extreme_deconvolution(data['X_test'], data['C_test'], weights, means, covars, likeonly=True) print(test_score) scores.append(test_score) print('Test Score: {} +- {}'.format(np.mean(scores), np.std(scores))) json.dump(scores, open(output_file, 'w'))
def test1_ngerrors(): #Generate data ndata= 10001 ngauss= 1 ydata= numpy.random.normal(scale=[1.,2.],size=(ndata,2))*numpy.sqrt(2.) ycovar= numpy.ones((ndata,2))*0. ngamp= numpy.ones((ndata,ngauss))/ngauss ngmean= numpy.zeros((ndata,ngauss,2)) ngcovar= numpy.ones((ndata,ngauss,2)) xamp= numpy.ones(1)/1. xmean= numpy.array([[0.,0.]]) xcovar= numpy.array([[[ 0.03821028, 0.02108113], [ 0.02108113, 0.03173839]]]) # """ xamp= numpy.ones(2)/2. xmean= numpy.array([[0.,0.],[1.,-1.]]) xcovar= numpy.array([[[ 0.03821028, 0.02108113], [ 0.02014796, 0.03173839]], [[ 0.06219194, 0.02302473], [ 0.02738021, 0.06778009]]]) # """ l= extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar, ng=True, ngamp=ngamp, ngmean=ngmean, ngcovar=ngcovar) print l print xamp, xmean, xcovar
def test_single_gauss_2d_offdiagunc_proj(): # Generate data from a single Gaussian, recover mean and variance ndata = 3001 ydata = numpy.random.normal(size=(ndata, 2)) + numpy.array([[1., 2.]]) # For half of the points, x -> x+y proj = numpy.random.uniform(size=ndata) > 0.5 ydata[proj, 0] = ydata[proj, 0] + ydata[proj, 1] projection = numpy.empty((ndata, 2, 2)) projection[proj] = numpy.array([[1., 1.], [0., 1.]]) projection[True ^ proj] = numpy.array([[1., 0.], [0., 1.]]) tycovar= numpy.ones_like(ydata)\ *numpy.random.uniform(size=(ndata,2))/2. ydata += numpy.random.normal(size=(ndata, 2)) * numpy.sqrt(tycovar) ycovar = numpy.empty((ndata, 2, 2)) for ii in range(ndata): ycovar[ii] = numpy.diag(tycovar[ii]) # initialize fit K = 1 initamp = numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata,axis=0)\ +numpy.std(ydata,axis=0)) initcovar = numpy.atleast_3d(numpy.cov(ydata, rowvar=False)).T # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, projection=projection) # Test tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean[0, 0] - 1. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initmean[0, 1] - 2. ) < tol, 'XD does not recover correct mean for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 0] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 1, 1] - 1. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' assert numpy.fabs( initcovar[0, 0, 1] - 0. ) < tol, 'XD does not recover correct variance for single Gaussian in 2D w/o uncertainties' return None
def test_single_gauss_1d_nounc(): # Generate data from a single Gaussian, recover mean and variance ndata= 3001 ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T ycovar= numpy.zeros_like(ydata) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.atleast_2d(numpy.mean(ydata)+1.) initcovar= numpy.atleast_3d(3.*numpy.var(ydata)) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar) # Test tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean-0.) < tol, 'XD does not recover correct mean for single Gaussian w/o uncertainties' assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties' return None
def TryModel(nGaussiansStar, nGaussiansGalaxy): print 'Star Gaussians: {0}'.format(nGaussiansStar) print 'Galaxy Gaussians: {0}'.format(nGaussiansGalaxy) #convolving print 'Estimating Gaussians' GMMStar = GMM(nGaussiansStar, n_iter = 10, covariance_type='full').fit(XTrainStar) GMMGalaxy = GMM(nGaussiansGalaxy, n_iter=10, covariance_type='full').fit(XTrainGalaxy) ampstar = GMMStar.weights_ meanstar = GMMStar.means_ covarstar = GMMStar.covars_ ampgalaxy = GMMGalaxy.weights_ meangalaxy = GMMGalaxy.means_ covargalaxy = GMMGalaxy.covars_ # Results are saved in `amp`, `mean`, and `covar` print 'Deconvolving star' xd.extreme_deconvolution(XTrainStar, XErrTrainStar, ampstar, meanstar, covarstar) clfstar = XDGMM(nGaussiansStar) clfstar.alpha = ampstar clfstar.mu = meanstar clfstar.V = covarstar print 'Deconvolving galaxies' xd.extreme_deconvolution(XTrainGalaxy, XErrTrainGalaxy, ampgalaxy, meangalaxy, covargalaxy) clfgalaxy = XDGMM(nGaussiansGalaxy) clfgalaxy.alpha = ampgalaxy clfgalaxy.mu = meangalaxy clfgalaxy.V = covargalaxy print 'Predicting' # need to pass XTestStar[i] and XTestGalaxy[i] as np.array([XTestStar[i]]) because internally it assumes 2D matrix starPredictions = np.array([predictStar(clfstar, clfgalaxy, np.array([XTestStar[i]]), np.array([XErrTestStar[i]]), i) for i in range(starTestNumber)]) galaxyPredictions = np.array([predictStar(clfstar, clfgalaxy, np.array([XTestGalaxy[i]]), np.array([XErrTestGalaxy[i]]), i) for i in range(galaxyTestNumber)]) predictions = np.array(starPredictions.tolist() + galaxyPredictions.tolist()) results = np.array([1 for i in range(len(starPredictions))] + [0 for i in range(len(galaxyPredictions))]) report = generateReport(predictions, results) return (report['Precision'], report['Recall'], clfstar, clfgalaxy)
def test_dual_gauss_1d_varunc(): # Generate data from two Gaussians, recover mean and variance ndata = 3001 amp_true = 0.3 assign = numpy.random.binomial(1, 1. - amp_true, ndata) ydata = numpy.zeros((ndata, 1)) ydata[assign == 0, 0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 2. ydata[assign == 1, 0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1. ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 2 initamp = numpy.ones(K) / float(K) initmean = numpy.array([[-1.], [0.]]) initcovar = numpy.zeros((K, 1, 1)) for kk in range(K): initcovar[kk] = numpy.mean(3. * numpy.var(ydata)) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar) # Test tol = 20. / numpy.sqrt(ndata) first = initamp < 0.5 assert numpy.fabs( initamp[first] - amp_true ) < tol, 'XD does not recover correct amp for dual Gaussian w/ uncertainties' assert numpy.fabs( initmean[first] - -2. ) < tol, 'XD does not recover correct mean for dual Gaussian w/ uncertainties' assert numpy.fabs( initcovar[first] - 1. ) < tol, 'XD does not recover correct variance for dual Gaussian w/ uncertainties' second = initamp >= 0.5 assert numpy.fabs( initamp[second] - (1. - amp_true) ) < tol, 'XD does not recover correct amp for dual Gaussian w/ uncertainties' assert numpy.fabs( initmean[second] - 1. ) < 2. * tol, 'XD does not recover correct mean for dual Gaussian w/ uncertainties' assert numpy.fabs( initcovar[second] - 4. ) < 2. * tol, 'XD does not recover correct variance for dual Gaussian w/ uncertainties' return None
def test_fixmean_single_gauss_1d_nounc(): # Generate data from a single Gaussian, recover variance, fixing mean ndata= 3001 ydata= numpy.atleast_2d(numpy.random.normal(size=ndata)).T+1. ycovar= numpy.zeros_like(ydata) # initialize fit K= 1 initamp= numpy.ones(K) initmean= numpy.array([[1.]]) initcovar= numpy.atleast_3d(3.*numpy.var(ydata)) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, fixmean=True) # Test tol= 10./numpy.sqrt(ndata) assert numpy.fabs(initmean-1.) < 10.**-10., 'XD did not fixmean for single Gaussian' assert numpy.fabs(initcovar-1.) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties, fixing mean' return None
def test_single_gauss_1d_varunc_log_loglikeonly(): # Same as in test_oned, but now also log ndata = 3001 ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 1 initamp = numpy.ones(K) initmean = numpy.atleast_2d(numpy.mean(ydata) + numpy.std(ydata)) initcovar = numpy.atleast_3d(3. * numpy.var(ydata)) # Run XD logfile = 'test_log' extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, logfile=logfile) # First test that fit worked tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean - 0. ) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties' assert numpy.fabs( initcovar - 1. ) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties' # Now compute the likelihood and check that it is the same as in the logfile lnl = extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, likeonly=True) with open(logfile + '_loglike.log') as log: lines = log.readlines() assert numpy.fabs( float(lines[-3]) - lnl ) < 10.**-6., 'loglike computed using likeonly is not the same as in the logfile' os.remove(logfile + '_c.log') os.remove(logfile + '_loglike.log') return None
def measure_kinematics_onepop(tgas,twomass,jk,dm,mj,spii,zbins,options, csvwriter,csvout,maxcovar=30.): # Compute XYZ lb= bovy_coords.radec_to_lb(tgas['ra'],tgas['dec'],degree=True,epoch=None) XYZ= bovy_coords.lbd_to_XYZ(lb[:,0],lb[:,1],1./tgas['parallax'], degree=True) # Generate vradec and projection matrix vradec= numpy.array([bovy_coords._K/tgas['parallax']*tgas['pmra'], bovy_coords._K/tgas['parallax']*tgas['pmdec']]) proj= compute_projection(tgas) # Sample from the joint (parallax,proper motion) uncertainty distribution # to get the covariance matrix of the vradec, using MC sims nmc= 10001 vradec_cov= compute_vradec_cov_mc(tgas,nmc) # Fit each zbin if spii == options.start: startz= options.startz else: startz= 0 for ii in tqdm.trange(startz,len(zbins)-1): indx= (XYZ[:,2] > zbins[ii])\ *(XYZ[:,2] <= zbins[ii+1])\ *(numpy.sqrt(XYZ[:,0]**2.+XYZ[:,1]**2.) < 0.2) nstar= numpy.sum(indx) if numpy.sum(indx) < 30: continue # Basic XD fit ydata= vradec.T[indx] ycovar= numpy.zeros_like(vradec.T)[indx] initamp= numpy.random.uniform(size=options.ngauss) initamp/= numpy.sum(initamp) m= numpy.zeros(3) s= numpy.array([40.,40.,20.]) initmean= [] initcovar= [] for jj in range(options.ngauss): initmean.append(m+numpy.random.normal(size=3)*s) initcovar.append(4.*s**2.*numpy.diag(numpy.ones(3))) initcovar= numpy.array(initcovar) initmean= numpy.array(initmean) lnL= extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, projection=proj[indx]) sig2z= combined_sig2(initamp,initmean[:,2],initcovar[:,2,2], maxcovar=maxcovar) kurtz= combined_k(initamp,initmean[:,2],initcovar[:,2,2], maxcovar=maxcovar) sam= bootstrap(options.nboot, vradec.T[indx],vradec_cov[indx],proj[indx], ngauss=options.ngauss,maxcovar=maxcovar) sig2z_err= 1.4826*numpy.median(numpy.fabs(sam[0]-numpy.median(sam[0]))) kurtz_err= 1.4826*numpy.median(numpy.fabs(sam[1]-numpy.median(sam[1]))) sig2kurtz_corr= numpy.corrcoef(sam)[0,1] csvwriter.writerow([spii,ii,nstar, sig2z,sig2z_err,kurtz,kurtz_err,sig2kurtz_corr]) csvout.flush() return None
def test_single_gauss_1d_varunc_log(): # Same as in test_oned, but now also log ndata = 3001 ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 1 initamp = numpy.ones(K) initmean = numpy.atleast_2d(numpy.mean(ydata) + numpy.std(ydata)) initcovar = numpy.atleast_3d(3. * numpy.var(ydata)) # Run XD logfile = 'test_log' extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, logfile=logfile) # First test that fit worked tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean - 0. ) < tol, 'XD does not recover correct mean for single Gaussian w/ uncertainties' assert numpy.fabs( initcovar - 1. ) < tol, 'XD does not recover correct variance for single Gaussian w/ uncertainties' # Now test that the logfiles exist assert os.path.exists( logfile + '_c.log'), 'XD did not produce _c.log logfile when asked' num_lines = sum(1 for line in open(logfile + '_c.log')) assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be" assert os.path.exists( logfile + '_loglike.log'), 'XD did not produce _loglike.log logfile when asked' num_lines = sum(1 for line in open(logfile + '_loglike.log')) assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be" os.remove(logfile + '_c.log') os.remove(logfile + '_loglike.log') return None
def test_single_gauss_1d_nounc(): # Generate data from a single Gaussian, recover mean and variance ndata = 3001 ydata = numpy.atleast_2d(numpy.random.normal(size=ndata)).T ycovar = numpy.zeros_like(ydata) # initialize fit K = 1 initamp = numpy.ones(K) initmean = numpy.atleast_2d(numpy.mean(ydata) + 1.) initcovar = numpy.atleast_3d(3. * numpy.var(ydata)) # Run XD extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar) # Test tol = 10. / numpy.sqrt(ndata) assert numpy.fabs( initmean - 0. ) < tol, 'XD does not recover correct mean for single Gaussian w/o uncertainties' assert numpy.fabs( initcovar - 1. ) < tol, 'XD does not recover correct variance for single Gaussian w/o uncertainties' return None
def test_ngerrors(): samples= False if samples: ngauss= 10 else: ngauss= 2 #Generate data ndata= 10001 ydata= numpy.random.normal(size=(ndata,1)) #Add noise for ii in range(ndata): if not samples: if numpy.random.uniform() < 0.5: ydata[ii,0]+= numpy.random.normal()+5. else: ydata[ii,0]+= numpy.random.normal()-3. #bovy_plot.bovy_print() #bovy_plot.bovy_hist(ydata,bins=101,histtype='step',color='k') #bovy_plot.bovy_end_print('/Users/bovy/Desktop/test.png') ycovar= numpy.ones((ndata,2))*0. ngamp= numpy.ones((ndata,ngauss,1))/ngauss ngmean= numpy.zeros((ndata,ngauss,1)) if samples: for ii in range(ndata): for jj in range(ngauss): if numpy.random.uniform() < 0.5: ngmean[ii,jj,0]= ydata[ii,0]+(numpy.random.normal()+5.) else: ngmean[ii,jj,0]= ydata[ii,0]+(numpy.random.normal()-3.) ydata[ii,0]= 0. ngcovar= numpy.zeros((ndata,ngauss,1)) else: ngmean[:,0,0]= 5. ngmean[:,1,0]= -3. ngcovar= numpy.ones((ndata,ngauss,1)) xamp= numpy.ones(1)/1. xmean= numpy.array([[0.]]) xcovar= numpy.array([[[0.03821028]]]) """ xamp= numpy.ones(2)/2. xmean= numpy.array([[0.,0.],[1.,-1.]]) xcovar= numpy.array([[[ 0.03821028, 0.02108113], [ 0.02014796, 0.03173839]], [[ 0.06219194, 0.02302473], [ 0.02738021, 0.06778009]]]) """ l= extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar, ng=True, ngamp=ngamp, ngmean=ngmean, ngcovar=ngcovar) print l print xamp, xmean, xcovar
def test_triple_gauss_1d_varunc_snm(): # Generate data from two Gaussians, recover mean and variance # Also run split-and-merge ndata= 3001 amp_true= [0.3,0.1,0.6] assign= numpy.random.choice(numpy.arange(3),p=amp_true,size=ndata) ydata= numpy.zeros((ndata,1)) ydata[assign==0,0]= numpy.random.normal(size=numpy.sum(assign==0))-4. ydata[assign==1,0]= numpy.random.normal(size=numpy.sum(assign==1))*2.+1. ydata[assign==2,0]= numpy.random.normal(size=numpy.sum(assign==2))*1.5+8. ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K= 3 initamp= numpy.ones(K)/float(K) initmean= numpy.array([[-1.],[0.],[1.]]) initcovar= numpy.zeros((K,1,1)) for kk in range(K): initcovar[kk]= numpy.mean(3.*numpy.var(ydata)) # Run XD extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, maxsnm=True) # Test tol= 25./numpy.sqrt(ndata) first= initamp > 0.5 assert numpy.fabs(initamp[first]-amp_true[2]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs(initmean[first]-8.) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs(initcovar[first]-1.5**2.) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' second= (initamp <= 0.5)*(initamp > 0.2) assert numpy.fabs(initamp[second]-amp_true[0]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs(initmean[second]--4.) < 2.*tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs(initcovar[second]-1.) < 2.*tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' third= (initamp <= 0.2) assert numpy.fabs(initamp[third]-amp_true[1]) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs(initmean[third]-1.) < 4.*tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs(initcovar[third]-4.) < 6.*tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' return None
def xd(data,init_xdtarget): initamp= init_xdtarget.amp initmean= init_xdtarget.mean initcovar= init_xdtarget.covar ydata= data.a ycovar= data.acov if hasattr(data,'weight'): weight= data.weight else: weight= None if hasattr(data,'logweight'): logweight= data.logweight else: logweight= False extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar, weight=weight,logweight=logweight) out_xdtarget= xdtarget(amp=initamp,mean=initmean,covar=initcovar) return out_xdtarget
def fit(self, X, Xerr): ''' fit GMM to X and Xerr ''' from extreme_deconvolution import extreme_deconvolution X, Xerr = self._X_check(X, Xerr) self._X = X self._Xerr = Xerr gmm = GMM(self.n_components, n_iter=10, covariance_type='full').fit(X) w, m, c = gmm.weights_.copy(), gmm.means_.copy(), gmm.covars_.copy() l = extreme_deconvolution(X, Xerr, w, m, c) self.l = l self.weights_ = w self.means_ = m self.covariances_ = c return None
def deconvolveAbundances(options,args): if options.xdfile is None: print "'xdfile' option needs to be set ..." print "Returning ..." return if os.path.exists(options.xdfile): return #Nothing to do #Load data raw= readRealData(options,args) #Deconvolve using XD, setup data ydata= numpy.zeros((len(raw),2)) ycovar= numpy.zeros((len(raw),2)) ydata[:,0]= raw.feh ydata[:,1]= raw.afe ycovar[:,0]= options.dfeh**2. ycovar[:,0]= options.dafe**2. #setup initial cond xamp= numpy.ones(2)/2. xmean= numpy.zeros((2,2)) xmean[0,0]= 0. #Solar abundances xmean[0,1]= 0. xmean[1,]= -0.6 #"thick" abundances xmean[1,1]= 0.35 xcovar= numpy.zeros((2,2,2)) xcovar[0,0,0]= 0.2**2. xcovar[1,0,0]= 0.2**2. xcovar[0,1,1]= 0.1**2. xcovar[1,1,1]= 0.1**2. #Run XD extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar) #Save outfile= open(options.xdfile,'wb') pickle.dump(xamp,outfile) pickle.dump(xmean,outfile) pickle.dump(xcovar,outfile) outfile.close()
def bootstrap(nboot,vrd,vrd_cov,proj,ngauss=2,maxcovar=30.): out= numpy.empty((2,nboot)) for ii in range(nboot): # Draw w/ replacement indx= numpy.floor(numpy.random.uniform(size=len(vrd))*len(vrd)).astype('int') ydata= vrd[indx] ycovar= vrd_cov[indx] initamp= numpy.random.uniform(size=ngauss) initamp/= numpy.sum(initamp) m= numpy.zeros(3) s= numpy.array([40.,40.,20.]) initmean= [] initcovar= [] for jj in range(ngauss): initmean.append(m+numpy.random.normal(size=3)*s) initcovar.append(4.*s**2.*numpy.diag(numpy.ones(3))) initcovar= numpy.array(initcovar) initmean= numpy.array(initmean) lnL= extreme_deconvolution(ydata,ycovar,initamp,initmean,initcovar,projection=proj[indx]) out[0,ii]= combined_sig2(initamp,initmean[:,2],initcovar[:,2,2], maxcovar=maxcovar) out[1,ii]= combined_k(initamp,initmean[:,2],initcovar[:,2,2], maxcovar=maxcovar) return out
GMMStar = GMM(nGaussiansStar, n_iter = 10, covariance_type='full').fit(XTrainStar) GMMGalaxy = GMM(nGaussiansGalaxy, n_iter=10, covariance_type='full').fit(XTrainGalaxy) ampstar = GMMStar.weights_ meanstar = GMMStar.means_ covarstar = GMMStar.covars_ ampgalaxy = GMMGalaxy.weights_ meangalaxy = GMMGalaxy.means_ covargalaxy = GMMGalaxy.covars_ # Results are saved in `amp`, `mean`, and `covar` print 'Deconvolving star' xd.extreme_deconvolution(XTrainStar, XErrTrainStar, ampstar, meanstar, covarstar) clfstar = XDGMM(nGaussiansStar) clfstar.alpha = ampstar clfstar.mu = meanstar clfstar.V = covarstar print 'Deconvolving galaxies' xd.extreme_deconvolution(XTrainGalaxy, XErrTrainGalaxy, ampgalaxy, meangalaxy, covargalaxy) clfgalaxy = XDGMM(nGaussiansGalaxy) clfgalaxy.alpha = ampgalaxy clfgalaxy.mu = meangalaxy clfgalaxy.V = covargalaxy
def plot_mapflarepdf(savename, plotname): # Load the samples with open('../mapfits/tribrokenexpflare.sav', 'rb') as savefile: bf = numpy.array(pickle.load(savefile)) samples = numpy.array(pickle.load(savefile)) maps = define_rcsample.MAPs() # Loop through the low-alpha MAPs and compute the XD decomposition if 'lowalpha' in savename: plotmaps = [9, 16, 23, 29, 36, 43, 50, 57, 64, 71] else: plotmaps = [19, 26, 32, 39, 45] if not os.path.exists(savename): ngauss = 2 allxamp = numpy.empty((len(plotmaps), ngauss)) allxmean = numpy.empty((len(plotmaps), ngauss, 1)) allxcovar = numpy.empty((len(plotmaps), ngauss, 1, 1)) cnt = 0 for ii, map in enumerate(maps.map()): if not ii in plotmaps: continue print ii # Fit PDFs with XD xamp = numpy.array([0.45, 0.5]) xmean = numpy.array([ numpy.mean(samples[ii, 4]) + numpy.random.normal() * numpy.std(samples[ii, 4]), numpy.mean(samples[ii, 4]) + numpy.random.normal() * numpy.std(samples[ii, 4]) ])[:, numpy.newaxis] xcovar = numpy.reshape( numpy.tile(numpy.var(samples[ii, 4]), (2, 1)), (2, 1, 1)) XD.extreme_deconvolution(samples[ii, 4][:, numpy.newaxis], numpy.zeros((len(samples[ii, 4]), 1)), xamp, xmean, xcovar) allxamp[cnt] = xamp allxmean[cnt] = xmean allxcovar[cnt] = xcovar cnt += 1 save_pickles(savename, allxamp, allxmean, allxcovar) else: with open(savename, 'rb') as savefile: allxamp = pickle.load(savefile) allxmean = pickle.load(savefile) allxcovar = pickle.load(savefile) # Now plot cmap = cm.coolwarm xrange = [-0.37, 0.25] if 'lowalpha' in savename: # xrange= [-0.4,0.2] yrange = [0., 30.] combDiv = 2. colorFunc = lambda x: cmap((x + 0.6) * 0.95 / 0.9 + 0.05) else: # xrange= [-0.3,0.3] yrange = [0., 13.5] colorFunc = lambda x: cmap((x + 0.5) * 0.95 / 0.5 + 0.05) combDiv = 1.5 overplot = False plotXDFit = True cnt = 0 bovy_plot.bovy_print(axes_labelsize=18, text_fontsize=18, xtick_labelsize=14, ytick_labelsize=14) for ii, map in enumerate(maps.map()): if not ii in plotmaps: continue tfeh = round(numpy.median(map['FE_H']) * 20.) / 20. if tfeh == 0.25: tfeh = 0.3 if tfeh == -0.1: tfeh = -0.1 bovy_plot.bovy_hist( samples[ii, 4], range=xrange, bins=51, overplot=overplot, yrange=yrange, histtype='step', normed=True, zorder=2, color=colorFunc(tfeh), xlabel=r'$R_{\mathrm{flare}}^{-1}\,(\mathrm{kpc}^{-1})$') if plotXDFit: txs = numpy.linspace(xrange[0], xrange[1], 1001) pyplot.plot( txs, 1. / numpy.sqrt(2. * numpy.pi) * (allxamp[cnt, 0] / numpy.sqrt(allxcovar[cnt, 0, 0, 0]) * numpy.exp(-0.5 * (txs - allxmean[cnt, 0, 0])**2. / allxcovar[cnt, 0, 0, 0]) + allxamp[cnt, 1] / numpy.sqrt(allxcovar[cnt, 1, 0, 0]) * numpy.exp(-0.5 * (txs - allxmean[cnt, 1, 0])**2. / allxcovar[cnt, 1, 0, 0])), color=colorFunc(tfeh), zorder=1) overplot = True cnt += 1 txs = numpy.linspace(xrange[0], xrange[1], 1001) comb = numpy.ones_like(txs) for ii in range(len(plotmaps)): comb *= 1. / numpy.sqrt(2. * numpy.pi) * ( allxamp[ii, 0] / numpy.sqrt(allxcovar[ii, 0, 0, 0]) * numpy.exp(-0.5 * (txs - allxmean[ii, 0, 0])**2. / allxcovar[ii, 0, 0, 0]) + allxamp[ii, 1] / numpy.sqrt(allxcovar[ii, 1, 0, 0]) * numpy.exp(-0.5 * (txs - allxmean[ii, 1, 0])**2. / allxcovar[ii, 1, 0, 0])) comb /= numpy.sum(comb) * (txs[1] - txs[0]) pyplot.plot(txs, comb / combDiv, 'k-', lw=2., zorder=20) pyplot.plot([0., 0.], [0., 50.], 'k--', lw=1.5, zorder=0) t = pyplot.text( xrange[0] + 0.25 * (xrange[1] - xrange[0]) + 0.03 * ('highalpha' in savename), 0.8 * yrange[1], r'$R_{\mathrm{flare}}^{-1} = %.2f \pm %.2f\,\mathrm{kpc}^{-1}$' % (numpy.sum(comb * txs) / numpy.sum(comb), numpy.sqrt( numpy.sum(comb * txs**2.) / numpy.sum(comb) - (numpy.sum(comb * txs) / numpy.sum(comb))**2.)), size=18.) t.set_bbox(dict(color='w', edgecolor='none')) if 'lowalpha' in savename: bovy_plot.bovy_text( r'$\mathrm{low-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$', top_left=True, size=16.) else: bovy_plot.bovy_text( r'$\mathrm{high-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$', top_left=True, size=16.) bovy_plot.bovy_end_print(plotname) return None
def test_triple_gauss_1d_varunc_snm_log(): # Like in oned, but also log ndata = 3001 amp_true = [0.1, 0.3, 0.6] assign = numpy.random.choice(numpy.arange(3), p=amp_true, size=ndata) ydata = numpy.zeros((ndata, 1)) ydata[assign == 0, 0] = numpy.random.normal(size=numpy.sum(assign == 0)) - 4. ydata[assign == 1, 0] = numpy.random.normal(size=numpy.sum(assign == 1)) * 2. + 1. ydata[assign == 2, 0] = numpy.random.normal(size=numpy.sum(assign == 2)) * 1.5 + 8. ycovar= numpy.ones_like(ydata)*\ numpy.atleast_2d(numpy.random.uniform(size=ndata)).T ydata+= numpy.atleast_2d(numpy.random.normal(size=ndata)).T\ *numpy.sqrt(ycovar) # initialize fit K = 3 initamp = numpy.ones(K) / float(K) initmean = numpy.array([[-1.], [0.], [1.]]) initcovar = numpy.zeros((K, 1, 1)) for kk in range(K): initcovar[kk] = numpy.mean(3. * numpy.var(ydata)) # Run XD logfile = 'test_log' extreme_deconvolution(ydata, ycovar, initamp, initmean, initcovar, maxsnm=True, logfile=logfile) # Test tol = 25. / numpy.sqrt(ndata) first = initamp > 0.5 assert numpy.fabs( initamp[first] - amp_true[2] ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs( initmean[first] - 8. ) < tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs( initcovar[first] - 1.5**2. ) < tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' second = (initamp <= 0.5) * (initamp > 0.2) assert numpy.fabs( initamp[second] - amp_true[0] ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs( initmean[second] - 1. ) < 4. * tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs( initcovar[second] - 4. ) < 4. * tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' third = (initamp <= 0.2) assert numpy.fabs( initamp[third] - amp_true[1] ) < tol, 'XD does not recover correct amp for triple Gaussian w/ uncertainties' assert numpy.fabs( initmean[third] - -4. ) < 2. * tol, 'XD does not recover correct mean for triple Gaussian w/ uncertainties' assert numpy.fabs( initcovar[third] - 1. ) < 2. * tol, 'XD does not recover correct variance for triple Gaussian w/ uncertainties' # Now test that the logfiles exist assert os.path.exists( logfile + '_c.log'), 'XD did not produce _c.log logfile when asked' num_lines = sum(1 for line in open(logfile + '_c.log')) assert num_lines > 0, "XD logfile _c.log appears to be empty, but shouldn't be" assert os.path.exists( logfile + '_loglike.log'), 'XD did not produce _loglike.log logfile when asked' num_lines = sum(1 for line in open(logfile + '_loglike.log')) assert num_lines > 0, "XD logfile _loglike.log appears to be empty, but shouldn't be" os.remove(logfile + '_c.log') os.remove(logfile + '_loglike.log') return None
def plot_mapflarepdf(savename,plotname): # Load the samples with open('../mapfits/tribrokenexpflare.sav','rb') as savefile: bf= numpy.array(pickle.load(savefile)) samples= numpy.array(pickle.load(savefile)) maps= define_rcsample.MAPs() # Loop through the low-alpha MAPs and compute the XD decomposition if 'lowalpha' in savename: plotmaps= [9,16,23,29,36,43,50,57,64,71] else: plotmaps= [19,26,32,39,45] if not os.path.exists(savename): ngauss= 2 allxamp= numpy.empty((len(plotmaps),ngauss)) allxmean= numpy.empty((len(plotmaps),ngauss,1)) allxcovar= numpy.empty((len(plotmaps),ngauss,1,1)) cnt= 0 for ii, map in enumerate(maps.map()): if not ii in plotmaps: continue print ii # Fit PDFs with XD xamp= numpy.array([0.45,0.5]) xmean= numpy.array([numpy.mean(samples[ii,4]) +numpy.random.normal()*numpy.std(samples[ii,4]), numpy.mean(samples[ii,4]) +numpy.random.normal()*numpy.std(samples[ii,4])])[:,numpy.newaxis] xcovar= numpy.reshape(numpy.tile(numpy.var(samples[ii,4]),(2,1)), (2,1,1)) XD.extreme_deconvolution(samples[ii,4][:,numpy.newaxis], numpy.zeros((len(samples[ii,4]),1)), xamp,xmean,xcovar) allxamp[cnt]= xamp allxmean[cnt]= xmean allxcovar[cnt]= xcovar cnt+= 1 save_pickles(savename,allxamp,allxmean,allxcovar) else: with open(savename,'rb') as savefile: allxamp= pickle.load(savefile) allxmean= pickle.load(savefile) allxcovar= pickle.load(savefile) # Now plot cmap= cm.coolwarm xrange= [-0.37,0.25] if 'lowalpha' in savename: # xrange= [-0.4,0.2] yrange= [0.,30.] combDiv= 2. colorFunc= lambda x: cmap((x+0.6)*0.95/0.9+0.05) else: # xrange= [-0.3,0.3] yrange= [0.,13.5] colorFunc= lambda x: cmap((x+0.5)*0.95/0.5+0.05) combDiv= 1.5 overplot= False plotXDFit= True cnt= 0 bovy_plot.bovy_print(axes_labelsize=18,text_fontsize=18, xtick_labelsize=14,ytick_labelsize=14) for ii, map in enumerate(maps.map()): if not ii in plotmaps: continue tfeh= round(numpy.median(map['FE_H'])*20.)/20. if tfeh == 0.25: tfeh= 0.3 if tfeh == -0.1: tfeh= -0.1 bovy_plot.bovy_hist(samples[ii,4], range=xrange,bins=51,overplot=overplot, yrange=yrange, histtype='step',normed=True,zorder=2, color=colorFunc(tfeh), xlabel=r'$R_{\mathrm{flare}}^{-1}\,(\mathrm{kpc}^{-1})$') if plotXDFit: txs= numpy.linspace(xrange[0],xrange[1],1001) pyplot.plot(txs,1./numpy.sqrt(2.*numpy.pi)*(allxamp[cnt,0]/numpy.sqrt(allxcovar[cnt,0,0,0])*numpy.exp(-0.5*(txs-allxmean[cnt,0,0])**2./allxcovar[cnt,0,0,0]) +allxamp[cnt,1]/numpy.sqrt(allxcovar[cnt,1,0,0])*numpy.exp(-0.5*(txs-allxmean[cnt,1,0])**2./allxcovar[cnt,1,0,0])), color=colorFunc(tfeh), zorder=1) overplot=True cnt+= 1 txs= numpy.linspace(xrange[0],xrange[1],1001) comb= numpy.ones_like(txs) for ii in range(len(plotmaps)): comb*= 1./numpy.sqrt(2.*numpy.pi)*(allxamp[ii,0]/numpy.sqrt(allxcovar[ii,0,0,0])*numpy.exp(-0.5*(txs-allxmean[ii,0,0])**2./allxcovar[ii,0,0,0]) +allxamp[ii,1]/numpy.sqrt(allxcovar[ii,1,0,0])*numpy.exp(-0.5*(txs-allxmean[ii,1,0])**2./allxcovar[ii,1,0,0])) comb/= numpy.sum(comb)*(txs[1]-txs[0]) pyplot.plot(txs,comb/combDiv,'k-',lw=2.,zorder=20) pyplot.plot([0.,0.],[0.,50.],'k--',lw=1.5,zorder=0) t= pyplot.text(xrange[0]+0.25*(xrange[1]-xrange[0])+0.03*('highalpha' in savename), 0.8*yrange[1], r'$R_{\mathrm{flare}}^{-1} = %.2f \pm %.2f\,\mathrm{kpc}^{-1}$' % (numpy.sum(comb*txs)/numpy.sum(comb), numpy.sqrt(numpy.sum(comb*txs**2.)/numpy.sum(comb)-(numpy.sum(comb*txs)/numpy.sum(comb))**2.)), size=18.) t.set_bbox(dict(color='w',edgecolor='none')) if 'lowalpha' in savename: bovy_plot.bovy_text(r'$\mathrm{low-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$', top_left=True, size=16.) else: bovy_plot.bovy_text(r'$\mathrm{high-}[\alpha/\mathrm{Fe}]\ \mathrm{MAPs}$', top_left=True, size=16.) bovy_plot.bovy_end_print(plotname) return None
def XDPotPDFs(options,args): #First load the chains savefile= open(args[0],'rb') thesesamples= pickle.load(savefile) savefile.close() if not options.derivedfile is None: if os.path.exists(options.derivedfile): derivedfile= open(options.derivedfile,'rb') derivedsamples= pickle.load(derivedfile) derivedfile.close() else: raise IOError("--derivedfile given but does not exist ...") samples= {} scaleDict= {} paramnames= ['rd','vc','zh','fh','dlnvcdlnr','usun','vsun'] scale= [_REFR0,_REFV0,1000.*_REFR0,1.,1./30.*_REFV0/_REFR0,_REFV0,_REFV0] if len(thesesamples[0]) == 5: paramnames.pop() paramnames.pop() scale.pop() scale.pop() if not options.derivedfile is None: paramnames.extend(['surfz','surfzdisk','rhodm', 'rhoo','massdisk','plhalo','vcdvc']) scale.extend([1.,1.,1.,1.,1.,1.,1.]) for kk in range(len(thesesamples[0])): xs= numpy.array([s[kk] for s in thesesamples]) if paramnames[kk] == 'rd' or paramnames[kk] == 'zh': xs= numpy.exp(xs) samples[paramnames[kk]]= xs scaleDict[paramnames[kk]]= scale[kk] if not options.derivedfile is None: for ll in range(len(thesesamples[0]), len(thesesamples[0])+7):#len(derivedsamples[0])): kk= ll-len(thesesamples[0]) xs= numpy.array([s[kk] for s in derivedsamples]) samples[paramnames[ll]]= xs scaleDict[paramnames[ll]]= scale[ll] #Now fit XD to the three 2D PDFs #1) Vd/v vs. Rd ydata= numpy.zeros((len(samples['vcdvc']),2)) ycovar= numpy.zeros((len(samples['vcdvc']),2)) ydata[:,0]= numpy.log(samples['rd']) ydata[:,1]= special.logit(samples['vcdvc']) vcdxamp= numpy.ones(options.g)/options.g vcdxmean= numpy.zeros((options.g,2)) vcdxcovar= numpy.zeros((options.g,2,2)) for ii in range(options.g): vcdxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4. vcdxcovar[ii,0,0]= numpy.var(ydata[:,0]) vcdxcovar[ii,1,1]= numpy.var(ydata[:,1]) extreme_deconvolution.extreme_deconvolution(ydata,ycovar, vcdxamp,vcdxmean,vcdxcovar) #2) alpha_dm vs. rho_dm ydata= numpy.zeros((len(samples['rhodm']),2)) ycovar= numpy.zeros((len(samples['rhodm']),2)) ydata[:,0]= numpy.log(samples['rhodm']) ydata[:,1]= special.logit(samples['plhalo']/3.) rhodmxamp= numpy.ones(options.g)/options.g rhodmxmean= numpy.zeros((options.g,2)) rhodmxcovar= numpy.zeros((options.g,2,2)) for ii in range(options.g): rhodmxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4. rhodmxcovar[ii,0,0]= numpy.var(ydata[:,0]) rhodmxcovar[ii,1,1]= numpy.var(ydata[:,1]) extreme_deconvolution.extreme_deconvolution(ydata,ycovar, rhodmxamp,rhodmxmean,rhodmxcovar) #3) dlnvcdlnr vs. vc ydata= numpy.zeros((len(samples['vc']),2)) ycovar= numpy.zeros((len(samples['vc']),2)) ydata[:,0]= numpy.log(samples['vc']) ydata[:,1]= samples['dlnvcdlnr'] vcxamp= numpy.ones(options.g)/options.g vcxmean= numpy.zeros((options.g,2)) vcxcovar= numpy.zeros((options.g,2,2)) for ii in range(options.g): vcxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4. vcxcovar[ii,0,0]= numpy.var(ydata[:,0]) vcxcovar[ii,1,1]= numpy.var(ydata[:,1]) extreme_deconvolution.extreme_deconvolution(ydata,ycovar, vcxamp,vcxmean,vcxcovar) save_pickles(options.plotfile,vcdxamp,vcdxmean,vcdxcovar, rhodmxamp,rhodmxmean,rhodmxcovar, vcxamp,vcxmean,vcxcovar) return None
initamp = np.random.uniform(size=ngauss) initamp /= np.sum(initamp) m = np.median(vals) s = 1.4826 * np.median(np.fabs(vals - m)) print ' iv, ir initial guess of median, sig=', ivel, irad, m, s initmean = [] initcovar = [] for ii in range(ngauss): initcovar.append(s**2.) initcovar = np.array([[initcovar]]).T # Now let the means vary for ii in range(ngauss): initmean.append(m + np.random.normal() * s) initmean = np.array([initmean]).T print("iv, ir, lnL",ivel,irad, \ extreme_deconvolution(ydata,ycovar, \ initamp,initmean,initcovar)) print("iv, ir, amp, mean, std. dev.",ivel,irad, \ initamp,initmean[:,0], \ np.sqrt(initcovar[:,0,0])) # store the amp and mean # sort with amplitude sortindx = np.argsort(initamp) sortindx = sortindx[::-1] # print ' sorted amp, mean = ', initamp[sortindx], \ # initmean[sortindx,0] gauxd_amp[irad, :] = initamp[sortindx] gauxd_mean[irad, :] = initmean[sortindx, 0] gauxd_std[irad, :] = np.sqrt(initcovar[sortindx, 0, 0]) gauxd_rr[irad] = rr # for plot
xcovar[i][3][3] = sub[3] else: xcovar[i][0][0] = neu_sigma[i, 0] / 4 xcovar[i][1][1] = neu_sigma[i, 1] / 4 xcovar[i][2][2] = neu_sigma[i, 2] / 4 xcovar[i][3][3] = neu_sigma[i, 3] / 4 with open('before_dec.txt', 'w') as filehandle: filehandle.write("start xmean \n" + str(xmean) + '\n') filehandle.write("start xamp \n" + str(xamp1) + '\n') filehandle.write("start xcovar: \n" + str(xcovar) + '\n') t0 = time.time() l = extreme_deconvolution(ydata, ycovar, xamp1, xmean, xcovar, weight=weights, maxiter=it, fixmean=True) t1 = time.time() xdc_time = t1 - t0 with open('after_dec.txt', 'w') as filehandle: filehandle.write("new xmean \n" + str(xmean) + '\n') filehandle.write("new xamp \n" + str(xamp1) + '\n') filehandle.write("new xcovar: \n" + str(xcovar) + '\n') print(str(it) + " iteration(s) done") times[it] = xdc_time log_like[it] = l timesfile = open('iters_time', 'wb')
def xdGamma(parser): (options, args) = parser.parse_args() if len(args) == 0: parser.print_help() return if options.outfilename is None: print "-o filename options needs to be set ..." print "Returning ..." return None if os.path.exists(options.outfilename): print options.outfilename + " exists ..." print "*Not* overwriting ..." print "Remove file before running ..." return numpy.random.seed(seed=options.seed) # Restore samples savefilename = args[0] print "Reading data ..." if os.path.exists(savefilename): savefile = open(savefilename, "rb") samples = pickle.load(savefile) type = pickle.load(savefile) band = pickle.load(savefile) mean = pickle.load(savefile) savefile.close() else: print "Input file does not exist ..." print "Returning ..." return # Prepare samples for XD print "Preparing data ..." if type == "powerlawSF": if len(band) > 1: print "multi-band not implemented yet ..." print "Returning ..." return else: nparams = 1 # RITABAN 2 for gamma and A elif type == "DRW": print "DRW not implemented yet ..." print "Returning ..." return elif type == "KS11": nparams = 1 elif type == "scatter": print "scatter not implemented yet ..." print "Returning ..." return ndata = len(samples) ydata = numpy.zeros((ndata, nparams)) ycovar = numpy.zeros((ndata, nparams, nparams)) ngamp = numpy.zeros((ndata, options.g)) ngmean = numpy.zeros((ndata, options.g, nparams)) ngcovar = numpy.zeros((ndata, options.g, nparams, nparams)) for ii, key in enumerate(samples.keys()): sys.stdout.write("\r" + _ERASESTR + "\r") sys.stdout.flush() sys.stdout.write("\rWorking on preparing %i / %i\r" % (ii + 1, ndata)) sys.stdout.flush() if type == "powerlawSF": # Stack as A,g,Ac,gc loggammas = [] # logAs= [] RITABAN for sample in samples[key]: loggammas.append(numpy.log(sample["gamma"][0])) # logAs.append(numpy.log(sample['logA'][0])) RITABAN loggammas = numpy.array(loggammas) ydata[ii, 0] = numpy.mean(loggammas) ycovar[ii, 0, 0] = numpy.var(loggammas) # logAs= numpy.array(logAs) RITABAN # ydata[ii,1]= numpy.mean(logAs) RITABAN # ycovar[ii,1,1]= numpy.var(logAs) RITABAN # Fit with g Gaussians thisydata = numpy.reshape( loggammas - ydata[ii, :], (len(loggammas), nparams) # subtract mean to fit the error distribution ) # RITABAN : The previous line can be replaced by # thisydata= ydata # I think thisycovar = ( numpy.zeros((len(loggammas), nparams)) + numpy.var(loggammas) * 10.0 ** -4.0 ) # regularize RITABAN you can probably leave this thisxamp = numpy.ones(options.g) / options.g thisxcovar = numpy.ones((options.g, nparams, nparams)) * numpy.var(loggammas) thisxmean = ( numpy.ones((options.g, nparams)) * numpy.mean(loggammas) + numpy.std(loggammas) * numpy.random.normal(size=(options.g, nparams)) / 4.0 ) # RITABAN : previous two lines should be replaced by something like # starting at line 122 (xmean= numpy.zeros((options.k,nparams))) # print numpy.mean(loggammas), numpy.std(loggammas) extreme_deconvolution(thisydata, thisycovar, thisxamp, thisxmean, thisxcovar) ngamp[ii, :] = thisxamp ngmean[ii, :, :] = thisxmean ngcovar[ii, :, :, :] = thisxcovar if len(band) > 1: print "Multi-band not supported currently" print "Returning ..." return elif type == "DRW": print "DRW not supported currently" print "Returning ..." return elif type == "KS11": print "type == 'KS11' not implemented yet ..." print "Returning ..." return sys.stdout.write("\r" + _ERASESTR + "\r") sys.stdout.flush() # Outlier rejection # if type == 'powerlawSF': # indx= (ydata[:,0] > -7.21) # ydata= ydata[indx,:] # ycovar= ycovar[indx,:,:] # Initial parameters for XD print "Running XD ..." xamp = numpy.ones(options.k) / float(options.k) xmean = numpy.zeros((options.k, nparams)) for kk in range(options.k): xmean[kk, :] = numpy.mean(ydata, axis=0) + numpy.random.normal() * numpy.std(ydata, axis=0) / 4.0 xcovar = numpy.zeros((options.k, nparams, nparams)) for kk in range(options.k): xcovar[kk, :, :] = numpy.cov(ydata.T) * 2.0 ll = extreme_deconvolution(ydata, ycovar, xamp, xmean, xcovar, ng=True, ngamp=ngamp, ngmean=ngmean, ngcovar=ngcovar) if True: print xamp print xmean print xcovar print ll # Prepare for saving print "Preparing output for saving ..." # Save print "Saving ..." if os.path.exists(options.outfilename): print options.outfilename + " exists ..." print "*Not* overwriting ..." print "Remove file before running ..." return if options.savefits: raise NotImplementedError("Fits saving not implemented yet") import pyfits cols = [] if type == "powerlawSF": colA = [] colg = [] for kk in range(options.k): colA.append(outparams[kk]["logA"]) colg.append(outparams[kk]["gamma"]) colA = numpy.array(colA) colg = numpy.array(colg) colw = numpy.log(numpy.array(weights)) cols.append(pyfits.Column(name="logA", format="E", array=colA)) cols.append(pyfits.Column(name="gamma", format="E", array=colg)) elif type == "KS11": colA = [] colg = [] cols = [] for kk in range(options.k): colA.append(outparams[kk]["logA"]) colg.append(outparams[kk]["gamma"]) colg.append(outparams[kk]["s"]) colA = numpy.array(colA) colg = numpy.array(colg) cols = numpy.array(colg) cols.append(pyfits.Column(name="logA", format="E", array=colA)) cols.append(pyfits.Column(name="gamma", format="E", array=colg)) cols.append(pyfits.Column(name="s", format="E", array=cols)) colw = numpy.log(numpy.array(weights)) cols.append(pyfits.Column(name="logweight", format="E", array=colw)) columns = pyfits.ColDefs(cols) tbhdu = pyfits.new_table(columns) tbhdu.writeto(options.outfilename) else: outfile = open(options.outfilename, "wb") pickle.dump(xamp, outfile) pickle.dump(xmean, outfile) pickle.dump(xcovar, outfile) pickle.dump(ll, outfile) outfile.close() return
def xdSamples(parser): (options,args)= parser.parse_args() if len(args) == 0: parser.print_help() return if options.outfilename is None: print "-o filename options needs to be set ..." print "Returning ..." return None numpy.random.seed(seed=options.seed) #Restore samples savefilename= args[0] print "Reading data ..." if os.path.exists(savefilename): savefile= open(savefilename,'rb') samples= pickle.load(savefile) type= pickle.load(savefile) band= pickle.load(savefile) mean= pickle.load(savefile) savefile.close() else: print "Input file does not exist ..." print "Returning ..." return #Prepare samples for XD print "Preparing data ..." if type == 'powerlawSF': if len(band) > 1: nparams= 4 else: nparams= 2 elif type == 'DRW': if len(band) == 1: nparams= 2 else: print "DRW for multi-band fits not implemented yet ..." print "Returning ..." return elif type == 'KS11': nparams= 3 elif type == 'scatter': nparams= 1 ii= 0 ndata= len(samples) ydata= numpy.zeros((ndata,nparams)) ycovar= numpy.zeros((ndata,nparams,nparams)) for key in samples.keys(): if type == 'powerlawSF': #Stack as A,g,Ac,gc logAs, loggammas= [], [] for sample in samples[key]: logAs.append(sample['logA'][0]) loggammas.append(numpy.log(sample['gamma'][0])) logAs= numpy.array(logAs) loggammas= numpy.array(loggammas) ydata[ii,0]= numpy.mean(logAs) ydata[ii,1]= numpy.mean(loggammas) ycovar[ii,:,:]= numpy.cov(numpy.vstack((logAs,loggammas))) if len(band) > 1: print "Multi-band not supported currently" print "Returning ..." return kIn[:,2]= numpy.array([p['logAgr'] for p in params.values()]).reshape(ndata) kIn[:,3]= numpy.array([p['gammagr'] for p in params.values()]).reshape(ndata) elif type == 'DRW': #Stack as loga2, logl loga2s, logls= [], [] for sample in samples[key]: loga2s.append(sample['loga2'][0]) logls.append(sample['logl'][0]) loga2s= numpy.array(loga2s) logls= numpy.array(logls) ydata[ii,0]= numpy.mean(loga2s) ydata[ii,1]= numpy.mean(logls) ycovar[ii,:,:]= numpy.cov(numpy.vstack((loga2s,logls))) if len(band) > 1: print "Multi-band not supported currently" print "Returning ..." return elif type == 'KS11': print "type == 'KS11' not implemented yet ..." print "Returning ..." return #Stack as A,g,s kIn[:,0]= numpy.array([p['logA'] for p in params.values()]).reshape(ndata) kIn[:,1]= numpy.array([p['gamma'] for p in params.values()]).reshape(ndata) kIn[:,2]= numpy.array([p['s'] for p in params.values()]).reshape(ndata) ii+= 1 #Outlier rejection #if type == 'powerlawSF': # indx= (ydata[:,0] > -7.21) # ydata= ydata[indx,:] # ycovar= ycovar[indx,:,:] #Initial parameters for XD print "Running XD ..." xamp= numpy.ones(options.k)/float(options.k) xmean= numpy.zeros((options.k,nparams)) for kk in range(options.k): xmean[kk,:]= numpy.mean(ydata,axis=0)\ +numpy.random.normal()*numpy.std(ydata,axis=0) xcovar= numpy.zeros((options.k,nparams,nparams)) for kk in range(options.k): xcovar[kk,:,:]= numpy.cov(ydata.T) extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar) #Prepare for saving print "Preparing output for saving ..." #Save print "Saving ..." if os.path.exists(options.outfilename): print options.outfilename+" exists ..." print "*Not* overwriting ..." print "Remove file before running ..." return if options.savefits: raise NotImplementedError("Fits saving not implemented yet") import pyfits cols= [] if type == 'powerlawSF': colA= [] colg= [] for kk in range(options.k): colA.append(outparams[kk]['logA']) colg.append(outparams[kk]['gamma']) colA= numpy.array(colA) colg= numpy.array(colg) colw= numpy.log(numpy.array(weights)) cols.append(pyfits.Column(name='logA',format='E', array=colA)) cols.append(pyfits.Column(name='gamma',format='E', array=colg)) elif type == 'KS11': colA= [] colg= [] cols= [] for kk in range(options.k): colA.append(outparams[kk]['logA']) colg.append(outparams[kk]['gamma']) colg.append(outparams[kk]['s']) colA= numpy.array(colA) colg= numpy.array(colg) cols= numpy.array(colg) cols.append(pyfits.Column(name='logA',format='E', array=colA)) cols.append(pyfits.Column(name='gamma',format='E', array=colg)) cols.append(pyfits.Column(name='s',format='E', array=cols)) colw= numpy.log(numpy.array(weights)) cols.append(pyfits.Column(name='logweight',format='E', array=colw)) columns= pyfits.ColDefs(cols) tbhdu= pyfits.new_table(columns) tbhdu.writeto(options.outfilename) else: outfile= open(options.outfilename,'wb') pickle.dump(xamp,outfile) pickle.dump(xmean,outfile) pickle.dump(xcovar,outfile) outfile.close() return
constant = ngauss / 5 #print(constant) xamp1 = np.ones(ngauss) / (ngauss - constant) xamp2 = np.ones(ngauss) / (ngauss - constant) xmean = neurons[0:ngauss, :] xcovar = np.zeros([ngauss, dx, dx]) #print(np.shape(xcovar)) #xcovar = np.cov(neurons.T) for i in range(ngauss): xcovar[i][0][0] = 0.00044115 xcovar[i][1][1] = 0.00074033 xcovar[i][2][2] = 0.00216775 xcovar[i][3][3] = 0.0073491 t0 = time.time() l = extreme_deconvolution(ydata, ycovar, xamp1, xmean, xcovar, weight=weights) t1 = time.time() xdc_time = t1 - t0 filename = "threshold_" + str(new_ndata.shape[0]) with open(filename, 'w') as filehandle: filehandle.write("ndata: " + str(new_ndata.shape[0]) + '\n') filehandle.write("ngauss: " + str(ngauss) + '\n') filehandle.write("gng: " + str(gng_time) + '\n') filehandle.write("binning: " + str(bin_time) + '\n') filehandle.write("deconvolution: " + str(xdc_time) + '\n')
def xdSamples(parser): (options, args) = parser.parse_args() if len(args) == 0: parser.print_help() return if options.outfilename is None: print "-o filename options needs to be set ..." print "Returning ..." return None numpy.random.seed(seed=options.seed) #Restore samples savefilename = args[0] print "Reading data ..." if os.path.exists(savefilename): savefile = open(savefilename, 'rb') samples = pickle.load(savefile) type = pickle.load(savefile) band = pickle.load(savefile) mean = pickle.load(savefile) savefile.close() else: print "Input file does not exist ..." print "Returning ..." return #Prepare samples for XD print "Preparing data ..." if type == 'powerlawSF': if len(band) > 1: nparams = 4 else: nparams = 2 elif type == 'DRW': if len(band) == 1: nparams = 2 else: print "DRW for multi-band fits not implemented yet ..." print "Returning ..." return elif type == 'KS11': nparams = 3 elif type == 'scatter': nparams = 1 ii = 0 ndata = len(samples) ydata = numpy.zeros((ndata, nparams)) ycovar = numpy.zeros((ndata, nparams, nparams)) for key in samples.keys(): if type == 'powerlawSF': #Stack as A,g,Ac,gc logAs, loggammas = [], [] for sample in samples[key]: logAs.append(sample['logA'][0]) loggammas.append(numpy.log(sample['gamma'][0])) logAs = numpy.array(logAs) loggammas = numpy.array(loggammas) ydata[ii, 0] = numpy.mean(logAs) ydata[ii, 1] = numpy.mean(loggammas) ycovar[ii, :, :] = numpy.cov(numpy.vstack((logAs, loggammas))) if len(band) > 1: print "Multi-band not supported currently" print "Returning ..." return kIn[:, 2] = numpy.array([p['logAgr'] for p in params.values()]).reshape(ndata) kIn[:, 3] = numpy.array([p['gammagr'] for p in params.values()]).reshape(ndata) elif type == 'DRW': #Stack as loga2, logl loga2s, logls = [], [] for sample in samples[key]: loga2s.append(sample['loga2'][0]) logls.append(sample['logl'][0]) loga2s = numpy.array(loga2s) logls = numpy.array(logls) ydata[ii, 0] = numpy.mean(loga2s) ydata[ii, 1] = numpy.mean(logls) ycovar[ii, :, :] = numpy.cov(numpy.vstack((loga2s, logls))) if len(band) > 1: print "Multi-band not supported currently" print "Returning ..." return elif type == 'KS11': print "type == 'KS11' not implemented yet ..." print "Returning ..." return #Stack as A,g,s kIn[:, 0] = numpy.array([p['logA'] for p in params.values()]).reshape(ndata) kIn[:, 1] = numpy.array([p['gamma'] for p in params.values()]).reshape(ndata) kIn[:, 2] = numpy.array([p['s'] for p in params.values()]).reshape(ndata) ii += 1 #Outlier rejection #if type == 'powerlawSF': # indx= (ydata[:,0] > -7.21) # ydata= ydata[indx,:] # ycovar= ycovar[indx,:,:] #Initial parameters for XD print "Running XD ..." xamp = numpy.ones(options.k) / float(options.k) xmean = numpy.zeros((options.k, nparams)) for kk in range(options.k): xmean[kk,:]= numpy.mean(ydata,axis=0)\ +numpy.random.normal()*numpy.std(ydata,axis=0) xcovar = numpy.zeros((options.k, nparams, nparams)) for kk in range(options.k): xcovar[kk, :, :] = numpy.cov(ydata.T) extreme_deconvolution(ydata, ycovar, xamp, xmean, xcovar) #Prepare for saving print "Preparing output for saving ..." #Save print "Saving ..." if os.path.exists(options.outfilename): print options.outfilename + " exists ..." print "*Not* overwriting ..." print "Remove file before running ..." return if options.savefits: raise NotImplementedError("Fits saving not implemented yet") import pyfits cols = [] if type == 'powerlawSF': colA = [] colg = [] for kk in range(options.k): colA.append(outparams[kk]['logA']) colg.append(outparams[kk]['gamma']) colA = numpy.array(colA) colg = numpy.array(colg) colw = numpy.log(numpy.array(weights)) cols.append(pyfits.Column(name='logA', format='E', array=colA)) cols.append(pyfits.Column(name='gamma', format='E', array=colg)) elif type == 'KS11': colA = [] colg = [] cols = [] for kk in range(options.k): colA.append(outparams[kk]['logA']) colg.append(outparams[kk]['gamma']) colg.append(outparams[kk]['s']) colA = numpy.array(colA) colg = numpy.array(colg) cols = numpy.array(colg) cols.append(pyfits.Column(name='logA', format='E', array=colA)) cols.append(pyfits.Column(name='gamma', format='E', array=colg)) cols.append(pyfits.Column(name='s', format='E', array=cols)) colw = numpy.log(numpy.array(weights)) cols.append(pyfits.Column(name='logweight', format='E', array=colw)) columns = pyfits.ColDefs(cols) tbhdu = pyfits.new_table(columns) tbhdu.writeto(options.outfilename) else: outfile = open(options.outfilename, 'wb') pickle.dump(xamp, outfile) pickle.dump(xmean, outfile) pickle.dump(xcovar, outfile) outfile.close() return