def test_qp_file_writing(): locs = np.array([1.0, 1.0, 2.0]) locs2 = np.array([1.2, 1.9, 1.8]) scales = np.array([0.1, 0.2, 0.5]) ens = qp.Ensemble(qp.stats.norm, data=dict(loc=locs, scale=scales)) ens2 = qp.Ensemble(qp.stats.norm, data=dict(loc=locs2, scale=scales)) outfile = "./fin_qp.hdf5" tmpfile = "./tmp_qp.hdf5" metafile = "./fin_qp_meta.hdf5" _ = initialize_qp_output(outfile) write_qp_output_chunk(tmpfile, outfile, ens, 0) write_qp_output_chunk(tmpfile, outfile, ens2, 1) num_chunks = 2 qp_reformat_output(tmpfile, outfile, num_chunks) assert os.path.exists(outfile) assert os.path.exists(metafile) os.remove(outfile) os.remove(metafile)
def estimate(self, test_data): color_data = make_color_data(test_data) pdfs, z_grid = self.model.predict(color_data, n_grid=self.nzbins) self.zgrid = np.array(z_grid).flatten() if self.output_format == 'qp': qp_dstn = qp.Ensemble(qp.interp, data=dict(xvals=self.zgrid, yvals=pdfs)) return qp_dstn else: zmode = np.array([self.zgrid[np.argmax(pdf)] for pdf in pdfs]).flatten() pz_dict = {'zmode': zmode, 'pz_pdf': pdfs} return pz_dict
def estimate(self, test_data): test_size = len(test_data['id']) zmode = np.repeat(self.zmode, test_size) if self.output_format == 'qp': qp_d = qp.Ensemble(qp.interp, data=dict(xvals=self.zgrid, yvals=np.tile(self.train_pdf, (test_size, 1)))) return qp_d else: pz_dict = { 'zmode': zmode, 'pz_pdf': np.tile(self.train_pdf, (test_size, 1)) } return pz_dict
def estimate(self, test_data): color_data = make_color_data(test_data) input_data = regularize_data(color_data) zmode = np.round(self.model.predict(input_data), 3) pdfs = [] widths = self.width * (1.0+zmode) if self.output_format == 'qp': qp_dstn = qp.Ensemble(qp.stats.norm, data=dict(loc=zmode, scale=widths)) return qp_dstn else: self.zgrid = np.linspace(self.zmin, self.zmax, self.nzbins) for i, zb in enumerate(zmode): pdfs.append(norm.pdf(self.zgrid, zb, widths[i])) pz_dict = {'zmode': zmode, 'pz_pdf': pdfs} return pz_dict
def main(argv): starttime = time.time() currenttime = time.time() outfile = "CDE_STATS.out" outfp = open(outfile,"w") z_array,ID,szs,mags,pzs = ingdata.ingestflexzdata() print "making Ensemble..." approx_pdf = qp.Ensemble(pzs.shape[0],gridded=(z_array,pzs),procs=3) oldtime = currenttime currenttime = time.time() print "took %g seconds"%(currenttime-oldtime) print "making EvaluateMetric Object" bpzobj = inmet.EvaluateMetric(approx_pdf,szs) oldtime = currenttime currenttime = time.time() print "took %g seconds"%(currenttime-oldtime) #print "calculating PIT vals..." #bpzPIT = bpzobj.PIT() #oldtime = currenttime #currenttime = time.time() #print "took %g seconds"%(currenttime-oldtime) #print "PIT!" #print bpzPIT #write to file oldtime = currenttime currenttime = time.time() print "took %g seconds"%(currenttime-oldtime) print "calculating cdeloss..." tmpxgrid = np.linspace(0.0,10.0,1000) cde_loss = bpzobj.cde_loss(tmpxgrid) print "CDE loss: %g\n"%cde_loss outfp.write("CDE LOSS:\n%.6g\n"%(cde_loss)) outfp.close() oldtime = currenttime currenttime = time.time() print "took %g seconds"%(currenttime-oldtime) print "finished\n"
def load_gridded(catalog_file_name, pz_file_name, z_spec_col, z_min, z_max, z_step): """ Load a files that are sampled on a reqular grid. Load data files that come from codes such as LePHARE and BPZ which sample their PDFs at regular grid points. Parameters ---------- catalog_file_name : str Name of the catalog file to load containing z_estimated and z_spec pz_file_name : str Name of file containing gridded PDF information z_min : float Minimum redshift of PDFs z_max : float Maximum redshift of PDFs. z_max is defined as inclusive in this calse z_step : float Step size in redshift for PDFs z_spec_col : int Column number of spectroscopic redshift. Returns ------- A tubple gontaining a list of qp.PDF objects for each estimated pdf in the file and a qp.PDF of the true N(z) created from samples of the distribution. """ # Load our data and create a the array of redshifts used in the grid. z_array = np.arange(z_min, z_max + z_step / 2., z_step) z_trues = np.loadtxt(catalog_file_name, usecols=z_spec_col) gridded_pdfs = np.loadtxt(pz_file_name) # Create our "true" PDF using the samples from the inputed data file. true_pdf = qp.PDF(samples=z_trues) # Create a qp.Ensamble objecct for each of the estimated pdfs. estimated_pdfs = qp.Ensemble(gridded_pdfs.shape[0], gridded=(z_array, gridded_pdfs)) return (estimated_pdfs, true_pdf)
def estimate(self, test_data): pdf = [] # allow for either format for now try: d = test_data['i_mag'] except Exception: d = test_data['mag_i_lsst'] numzs = len(d) zmode = np.round(np.random.uniform(0.0, self.zmax, numzs), 3) widths = self.width * (1.0 + zmode) self.zgrid = np.linspace(0., self.zmax, self.nzbins) for i in range(numzs): pdf.append(norm.pdf(self.zgrid, zmode[i], widths[i])) if self.output_format == 'qp': qp_d = qp.Ensemble(qp.stats.norm, data=dict(loc=zmode, scale=widths)) return qp_d else: pz_dict = {'zmode': zmode, 'pz_pdf': pdf} return pz_dict
def main(argv): starttime = time.time() currenttime = time.time() z_array, ID, szs, mags, pzs = ingdata.ingestflexzdata() print "making Ensemble..." approx_pdf = qp.Ensemble(pzs.shape[0], gridded=(z_array, pzs), procs=3) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) print "making EvaluateMetric Object" bpzobj = inmet.EvaluateMetric(approx_pdf, szs) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) print "calculating PIT vals..." bpzPIT = bpzobj.PIT() oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) #print "PIT!" #print bpzPIT #write to file outfp = open("TESTPITVALS.out", "w") outfp.write("#ID PIT\n") for i in range(len(ID)): outfp.write("%d %0.5f\n" % (ID[i], bpzobj.pitarray[i])) outfp.close() #QQplot print "making QQ plot..." qq_qtheory, qq_qdata = bpzobj.QQvectors(using='gridded', dx=0.0001, Nquants=1001) outfp = open("TESTQQvectors.out", "w") outfp.write("#qtheory qdata\n") for i in range(len(qq_qtheory)): outfp.write("%0.6f %0.6f\n" % (qq_qtheory[i], qq_qdata[i])) outfp.close() ###all stats outfp = open("TEST_STATS_KSCVMAD.out", "w") ksstat, kspval = bpzobj.KS(using='gridded', dx=0.0001) outfp.write("KSval: %.6g\n" % (ksstat)) outfp.write("KSpval: %.6g\n" % (kspval)) cvmstat, cvmpval = bpzobj.CvM(using='gridded', dx=0.0001) outfp.write("CvMval: %.6g\n" % (cvmstat)) outfp.write("Cvmpval: %.6g\n" % (cvmpval)) vmn = 0.05 vmx = 0.95 adstat, adpval = bpzobj.AD(using='gridded', dx=0.0001, vmin=vmn, vmax=vmx) outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (vmn, vmx, adstat)) outfp.write("ADpval: %.6g\n" % (adpval)) vmn = 0.1 vmx = 0.9 adstat, adpval = bpzobj.AD(using='gridded', dx=0.0001, vmin=vmn, vmax=vmx) outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (vmn, vmx, adstat)) outfp.write("ADpval: %.6g\n" % (adpval)) vmn = 0.01 vmx = 0.99 adstat, adpval = bpzobj.AD(using='gridded', dx=0.0001, vmin=vmn, vmax=vmx) outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (vmn, vmx, adstat)) outfp.write("ADpval: %.6g\n" % (adpval)) print "finished\n"
def main(argv): starttime = time.time() currenttime = time.time() # z_array, ID, szs, mags, pzs = ingdata.ingestflexzdata() print "making Ensemble..." approx_pdf = qp.Ensemble(pzs.shape[0], gridded=(z_array, pzs), procs=3) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) print "making NzSumEvaluateMetric Object, with stacking..." nzobj = inmet.NzSumEvaluateMetric(approx_pdf, szs, eval_grid=z_array, using='gridded', dx=0.0001) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) print "calculating Nz sum vectors..." newgrid = np.arange(0.0, 2.0001, 0.001) #create qp object of samples from the spec-z sample szsamplepdf = qp.PDF(samples=szs) specznz = szsamplepdf.evaluate( newgrid, using='samples', vb=True, norm=False)[1] #only grab the 2nd part of the tuples! photznz = nzobj.stackpz.evaluate( newgrid, using='gridded', vb=True, norm=False)[1] #only grab the 2nd part of the tuples! outfp = open("NZPLOT_vectors.out", "w") outfp.write("#z_array speczNz photzNz\n") for i in range(len(newgrid)): outfp.write("%f %g %g\n" % (newgrid[i], specznz[i], photznz[i])) outfp.close() oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) print "calculating KS stat..." ks_stat, ks_pval = nzobj.NZKS() print "ks_stat: %g\nks_pval: %g\n" % (ks_stat, ks_pval) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) cvm_stat, cvm_pval = nzobj.NZCVM() print "cvm_stat: %g\cvm_pval: %g\n" % (cvm_stat, cvm_pval) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) zmin = min(szs) zmax = max(szs) delv = (zmax - zmin) / 200. ad_stat, ad_pval = nzobj.NZAD(vmin=zmin, vmax=zmax, delv=delv) print "ad_stat: %g\ad_pval: %g\n" % (ad_stat, ad_pval) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) ad_statx, ad_pvalx = nzobj.NZAD(vmin=0.0, vmax=2.0, delv=0.01) print "ad_stat full range: %g\ad_pval: %g\n" % (ad_statx, ad_pvalx) oldtime = currenttime currenttime = time.time() print "took %g seconds" % (currenttime - oldtime) ###all stats outfp = open("NZ_STATS_KSCVMAD.out", "w") outfp.write("KSval: %.6g\n" % (ks_stat)) outfp.write("KSpval: %.6g\n" % (ks_pval)) outfp.write("CvMval: %.6g\n" % (cvm_stat)) outfp.write("Cvmpval: %.6g\n" % (cvm_pval)) outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (zmin, zmax, ad_stat)) outfp.write("ADpval: %.6g\n" % (ad_pval)) outfp.write("ADval for vmin/vmax=0.0/2.0: %.6g\n" % (ad_statx)) outfp.write("ADpval: %.6g\n" % (ad_pvalx)) outfp.close() print "finished\n"