示例#1
0
def test_qp_file_writing():
    locs = np.array([1.0, 1.0, 2.0])
    locs2 = np.array([1.2, 1.9, 1.8])
    scales = np.array([0.1, 0.2, 0.5])
    ens = qp.Ensemble(qp.stats.norm, data=dict(loc=locs, scale=scales))
    ens2 = qp.Ensemble(qp.stats.norm, data=dict(loc=locs2, scale=scales))
    outfile = "./fin_qp.hdf5"
    tmpfile = "./tmp_qp.hdf5"
    metafile = "./fin_qp_meta.hdf5"
    _ = initialize_qp_output(outfile)
    write_qp_output_chunk(tmpfile, outfile, ens, 0)
    write_qp_output_chunk(tmpfile, outfile, ens2, 1)
    num_chunks = 2
    qp_reformat_output(tmpfile, outfile, num_chunks)
    assert os.path.exists(outfile)
    assert os.path.exists(metafile)
    os.remove(outfile)
    os.remove(metafile)
示例#2
0
 def estimate(self, test_data):
     color_data = make_color_data(test_data)
     pdfs, z_grid = self.model.predict(color_data, n_grid=self.nzbins)
     self.zgrid = np.array(z_grid).flatten()
     if self.output_format == 'qp':
         qp_dstn = qp.Ensemble(qp.interp,
                               data=dict(xvals=self.zgrid, yvals=pdfs))
         return qp_dstn
     else:
         zmode = np.array([self.zgrid[np.argmax(pdf)]
                           for pdf in pdfs]).flatten()
         pz_dict = {'zmode': zmode, 'pz_pdf': pdfs}
         return pz_dict
示例#3
0
 def estimate(self, test_data):
     test_size = len(test_data['id'])
     zmode = np.repeat(self.zmode, test_size)
     if self.output_format == 'qp':
         qp_d = qp.Ensemble(qp.interp,
                            data=dict(xvals=self.zgrid,
                                      yvals=np.tile(self.train_pdf,
                                                    (test_size, 1))))
         return qp_d
     else:
         pz_dict = {
             'zmode': zmode,
             'pz_pdf': np.tile(self.train_pdf, (test_size, 1))
         }
         return pz_dict
示例#4
0
    def estimate(self, test_data):
        color_data = make_color_data(test_data)
        input_data = regularize_data(color_data)
        zmode = np.round(self.model.predict(input_data), 3)
        pdfs = []
        widths = self.width * (1.0+zmode)

        if self.output_format == 'qp':
            qp_dstn = qp.Ensemble(qp.stats.norm, data=dict(loc=zmode,
                                                           scale=widths))
            return qp_dstn
        else:
            self.zgrid = np.linspace(self.zmin, self.zmax, self.nzbins)
            for i, zb in enumerate(zmode):
                pdfs.append(norm.pdf(self.zgrid, zb, widths[i]))
            pz_dict = {'zmode': zmode, 'pz_pdf': pdfs}
            return pz_dict
def main(argv):
    starttime = time.time()
    currenttime = time.time()
    outfile = "CDE_STATS.out"
    outfp = open(outfile,"w")
    z_array,ID,szs,mags,pzs = ingdata.ingestflexzdata()



    print "making Ensemble..."
    approx_pdf = qp.Ensemble(pzs.shape[0],gridded=(z_array,pzs),procs=3)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds"%(currenttime-oldtime)
    print "making EvaluateMetric Object"
    bpzobj = inmet.EvaluateMetric(approx_pdf,szs)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds"%(currenttime-oldtime)
    #print "calculating PIT vals..."
    #bpzPIT = bpzobj.PIT()
    #oldtime = currenttime
    #currenttime = time.time()
    #print "took %g seconds"%(currenttime-oldtime)

    #print "PIT!"
    #print bpzPIT
    #write to file
    
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds"%(currenttime-oldtime)
    print "calculating cdeloss..."
    tmpxgrid = np.linspace(0.0,10.0,1000)
    cde_loss = bpzobj.cde_loss(tmpxgrid)
    print "CDE loss: %g\n"%cde_loss
    outfp.write("CDE LOSS:\n%.6g\n"%(cde_loss))
    outfp.close()

    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds"%(currenttime-oldtime)
    print "finished\n"
示例#6
0
def load_gridded(catalog_file_name, pz_file_name, z_spec_col,
                 z_min, z_max, z_step):
    """ Load a files that are sampled on a reqular grid.

    Load data files that come from codes such as LePHARE and BPZ which
    sample their PDFs at regular grid points.

    Parameters
    ----------
    catalog_file_name : str
        Name of the catalog file to load containing z_estimated and z_spec
    pz_file_name : str
        Name of file containing gridded PDF information
    z_min : float
        Minimum redshift of PDFs
    z_max : float
        Maximum redshift of PDFs. z_max is defined as inclusive in this calse
    z_step : float
        Step size in redshift for PDFs
    z_spec_col : int
       Column number of spectroscopic redshift.

    Returns
    -------
    A tubple gontaining a list of qp.PDF objects for each estimated pdf
    in the file and a qp.PDF of the true N(z) created from samples of the
    distribution.
    """

    # Load our data and create a the array of redshifts used in the grid.
    z_array = np.arange(z_min, z_max + z_step / 2., z_step)
    z_trues = np.loadtxt(catalog_file_name, usecols=z_spec_col)
    gridded_pdfs = np.loadtxt(pz_file_name)

    # Create our "true" PDF using the samples from the inputed data file.
    true_pdf = qp.PDF(samples=z_trues)

    # Create a qp.Ensamble objecct for each of the estimated pdfs.
    estimated_pdfs = qp.Ensemble(gridded_pdfs.shape[0],
                                 gridded=(z_array, gridded_pdfs))

    return (estimated_pdfs, true_pdf)
示例#7
0
 def estimate(self, test_data):
     pdf = []
     # allow for either format for now
     try:
         d = test_data['i_mag']
     except Exception:
         d = test_data['mag_i_lsst']
     numzs = len(d)
     zmode = np.round(np.random.uniform(0.0, self.zmax, numzs), 3)
     widths = self.width * (1.0 + zmode)
     self.zgrid = np.linspace(0., self.zmax, self.nzbins)
     for i in range(numzs):
         pdf.append(norm.pdf(self.zgrid, zmode[i], widths[i]))
     if self.output_format == 'qp':
         qp_d = qp.Ensemble(qp.stats.norm,
                            data=dict(loc=zmode, scale=widths))
         return qp_d
     else:
         pz_dict = {'zmode': zmode, 'pz_pdf': pdf}
         return pz_dict
def main(argv):
    starttime = time.time()
    currenttime = time.time()

    z_array, ID, szs, mags, pzs = ingdata.ingestflexzdata()

    print "making Ensemble..."
    approx_pdf = qp.Ensemble(pzs.shape[0], gridded=(z_array, pzs), procs=3)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "making EvaluateMetric Object"
    bpzobj = inmet.EvaluateMetric(approx_pdf, szs)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "calculating PIT vals..."
    bpzPIT = bpzobj.PIT()
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    #print "PIT!"
    #print bpzPIT
    #write to file
    outfp = open("TESTPITVALS.out", "w")
    outfp.write("#ID PIT\n")
    for i in range(len(ID)):
        outfp.write("%d %0.5f\n" % (ID[i], bpzobj.pitarray[i]))
    outfp.close()
    #QQplot
    print "making QQ plot..."
    qq_qtheory, qq_qdata = bpzobj.QQvectors(using='gridded',
                                            dx=0.0001,
                                            Nquants=1001)
    outfp = open("TESTQQvectors.out", "w")
    outfp.write("#qtheory qdata\n")
    for i in range(len(qq_qtheory)):
        outfp.write("%0.6f %0.6f\n" % (qq_qtheory[i], qq_qdata[i]))
    outfp.close()

    ###all stats
    outfp = open("TEST_STATS_KSCVMAD.out", "w")

    ksstat, kspval = bpzobj.KS(using='gridded', dx=0.0001)
    outfp.write("KSval: %.6g\n" % (ksstat))
    outfp.write("KSpval: %.6g\n" % (kspval))

    cvmstat, cvmpval = bpzobj.CvM(using='gridded', dx=0.0001)
    outfp.write("CvMval: %.6g\n" % (cvmstat))
    outfp.write("Cvmpval: %.6g\n" % (cvmpval))

    vmn = 0.05
    vmx = 0.95
    adstat, adpval = bpzobj.AD(using='gridded', dx=0.0001, vmin=vmn, vmax=vmx)
    outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (vmn, vmx, adstat))
    outfp.write("ADpval: %.6g\n" % (adpval))

    vmn = 0.1
    vmx = 0.9
    adstat, adpval = bpzobj.AD(using='gridded', dx=0.0001, vmin=vmn, vmax=vmx)
    outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (vmn, vmx, adstat))
    outfp.write("ADpval: %.6g\n" % (adpval))

    vmn = 0.01
    vmx = 0.99
    adstat, adpval = bpzobj.AD(using='gridded', dx=0.0001, vmin=vmn, vmax=vmx)
    outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" % (vmn, vmx, adstat))
    outfp.write("ADpval: %.6g\n" % (adpval))

    print "finished\n"
def main(argv):

    starttime = time.time()
    currenttime = time.time()
    #
    z_array, ID, szs, mags, pzs = ingdata.ingestflexzdata()

    print "making Ensemble..."
    approx_pdf = qp.Ensemble(pzs.shape[0], gridded=(z_array, pzs), procs=3)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "making NzSumEvaluateMetric Object, with stacking..."

    nzobj = inmet.NzSumEvaluateMetric(approx_pdf,
                                      szs,
                                      eval_grid=z_array,
                                      using='gridded',
                                      dx=0.0001)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "calculating Nz sum vectors..."
    newgrid = np.arange(0.0, 2.0001, 0.001)
    #create qp object of samples from the spec-z sample
    szsamplepdf = qp.PDF(samples=szs)
    specznz = szsamplepdf.evaluate(
        newgrid, using='samples', vb=True,
        norm=False)[1]  #only grab the 2nd part of the tuples!
    photznz = nzobj.stackpz.evaluate(
        newgrid, using='gridded', vb=True,
        norm=False)[1]  #only grab the 2nd part of the tuples!
    outfp = open("NZPLOT_vectors.out", "w")
    outfp.write("#z_array speczNz photzNz\n")
    for i in range(len(newgrid)):
        outfp.write("%f %g %g\n" % (newgrid[i], specznz[i], photznz[i]))
    outfp.close()
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "calculating KS stat..."

    ks_stat, ks_pval = nzobj.NZKS()
    print "ks_stat: %g\nks_pval: %g\n" % (ks_stat, ks_pval)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    cvm_stat, cvm_pval = nzobj.NZCVM()
    print "cvm_stat: %g\cvm_pval: %g\n" % (cvm_stat, cvm_pval)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    zmin = min(szs)
    zmax = max(szs)
    delv = (zmax - zmin) / 200.

    ad_stat, ad_pval = nzobj.NZAD(vmin=zmin, vmax=zmax, delv=delv)
    print "ad_stat: %g\ad_pval: %g\n" % (ad_stat, ad_pval)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    ad_statx, ad_pvalx = nzobj.NZAD(vmin=0.0, vmax=2.0, delv=0.01)
    print "ad_stat full range: %g\ad_pval: %g\n" % (ad_statx, ad_pvalx)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    ###all stats
    outfp = open("NZ_STATS_KSCVMAD.out", "w")

    outfp.write("KSval: %.6g\n" % (ks_stat))
    outfp.write("KSpval: %.6g\n" % (ks_pval))

    outfp.write("CvMval: %.6g\n" % (cvm_stat))
    outfp.write("Cvmpval: %.6g\n" % (cvm_pval))

    outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" %
                (zmin, zmax, ad_stat))
    outfp.write("ADpval: %.6g\n" % (ad_pval))

    outfp.write("ADval for vmin/vmax=0.0/2.0: %.6g\n" % (ad_statx))
    outfp.write("ADpval: %.6g\n" % (ad_pvalx))

    outfp.close()

    print "finished\n"