Exemplo n.º 1
0
def main(sim_name, z_start, z_stop, merger_parent, catalog_parent, superslab_start, resume=False, plot=False, complete=False):
    '''
    Main function.
    The algorithm: for each merger tree epoch, for 
    each superslab, for each light cone origin,
    compute the intersection of the light cone with
    each halo, using the interpolated position
    to the previous merger epoch (and possibly a 
    velocity correction).  If the intersection is
    between the current and previous merger epochs, 
    then record the closer one as that halo's
    epoch and mark its progenitors as ineligible.
    Will need one padding superslab in the previous
    merger epoch.  Can process in a rolling fashion.
    '''
    
    # turn directories into Paths
    merger_parent = Path(merger_parent)
    catalog_parent = Path(catalog_parent)
    merger_dir = merger_parent / sim_name
    header = get_one_header(merger_dir)
    
    # simulation parameters
    Lbox = header['BoxSize']
    # location of the LC origins in Mpc/h
    origins = np.array(header['LightConeOrigins']).reshape(-1,3)

    
    # just for testing with highbase. remove!
    if 'highbase' in sim_name:
        origins /= 2.
    
    
    # directory where we save the final outputs
    cat_lc_dir = catalog_parent / "halo_light_cones" / sim_name  
    os.makedirs(cat_lc_dir, exist_ok=True)

    # directory where we save the current state if we want to resume
    os.makedirs(cat_lc_dir / "tmp", exist_ok=True)
    with open(cat_lc_dir / "tmp" / "build.log", "a") as f:
        f.writelines(["# Starting light cone catalog construction in simulation %s \n"%sim_name])
    
    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists(Path("data_headers") / sim_name / "coord_dist.npy") or not os.path.exists(Path("data_headers") / sim_name / "redshifts.npy") or not os.path.exists(Path("data_headers") / sim_name / "eta_drift.npy"):
        zs_all, steps_all, chis_all, etad_all = get_lc_info(Path("all_headers") / sim_name)
        os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
        np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
        np.save(Path("data_headers") / sim_name / "steps.npy", steps_all)
        np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
        np.save(Path("data_headers") / sim_name / "eta_drift.npy", etad_all)
    zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
    chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
    zs_all[-1] = float("%.1f" % zs_all[-1])  # LHG: I guess this is trying to match up to some filename or something?

    # get functions relating chi and z
    #chi_of_z = interp1d(np.insert(zs_all, 0, 0.), np.insert(chis_all, 0, 0.))
    #z_of_chi = interp1d(np.insert(chis_all, 0, 0.), np.insert(zs_all, 0, 0.))
    chi_of_z = interp1d(zs_all,chis_all)
    z_of_chi = interp1d(chis_all, zs_all)

    
    # if merger tree redshift information has been saved, load it (if not, save it)
    if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
        np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
    zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")

    # number of superslabs
    n_superslabs = len(list(merger_dir.glob("associations_z%4.3f.*.asdf"%zs_mt[0])))
    print("number of superslabs = ",n_superslabs)

    # starting and finishing redshift indices indices
    ind_start = np.argmin(np.abs(zs_mt - z_start))
    ind_stop = np.argmin(np.abs(zs_mt - z_stop))

    # initialize difference between the conformal time of the previous two catalogs
    delta_chi_old = 0.
    
    if resume:
        # if user wants to resume from previous state, create padded array for marking whether superslab has been loaded
        resume_flags = np.ones((n_superslabs, origins.shape[1]), dtype=bool)
        
        # previous redshift, distance between shells
        infile = InputFile(cat_lc_dir / "tmp" / "build.log")
        z_this_tmp = infile.z_prev
        delta_chi_old = infile.delta_chi
        superslab = infile.super_slab

        assert (np.abs(zs_mt[ind_start] - z_this_tmp) < 1.0e-6), "Your recorded state is not for the currently requested redshift, can't resume from old. Last recorded state is z = %.3f"%z_this_tmp
        assert (np.abs((superslab_start-1)%n_superslabs - superslab) < 1.0e-6), "Your recorded state is not for the currently requested superslab, can't resume from old. Last recorded state is superslab = %d"%superslab
        with open(cat_lc_dir / "tmp" / "build.log", "a") as f:
            f.writelines(["# Resuming from redshift z = %4.3f \n"%z_this_tmp])
    else:
        # delete the exisiting temporary files
        tmp_files = list((cat_lc_dir / "tmp").glob("*"))
        for i in range(len(tmp_files)):
            os.unlink(str(tmp_files[i]))
        resume_flags = np.zeros((n_superslabs, origins.shape[0]), dtype=bool)

    # fields to extract from the merger trees
    fields_mt = ['HaloIndex', 'Position', 'MainProgenitor', 'Progenitors', 'NumProgenitors']
    # lighter version 
    #fields_mt = ['HaloIndex', 'Position', 'MainProgenitor']

    # redshift of closest point on wall between original and copied box
    z1 = z_of_chi(0.5 * Lbox - origins[0][0])
    # redshift of closest point where all three boxes touch
    z2 = z_of_chi((0.5*Lbox-origins[0][0])*np.sqrt(2))
    # furthest point where all three boxes touch
    z3 = z_of_chi((0.5 * Lbox - origins[0][0]) * np.sqrt(3))
    
    for i in range(ind_start, ind_stop + 1):

        # this snapshot redshift and the previous
        z_this = zs_mt[i]
        z_prev = zs_mt[i + 1]
        #z_pprev = zs_mt[i + 2] # not currently used
        print("redshift of this and the previous snapshot = ", z_this, z_prev)

        # the names of the folders need to be standardized tuks
        zname_this = min(header['L1OutputRedshifts'], key=lambda z: abs(z - z_this))
        
        # check that you are starting at a reasonable redshift
        assert z_this >= np.min(zs_all), "You need to set starting redshift to the smallest value of the merger tree"
            
        # coordinate distance of the light cone at this redshift and the previous
        chi_this = chi_of_z(z_this)
        chi_prev = chi_of_z(z_prev)
        #chi_pprev = chi_of_z(z_pprev) # not currently used
        delta_chi = chi_prev - chi_this
        #delta_chi_new = chi_pprev - chi_prev # not currently used
        print("comoving distance between this and previous snapshot = ", delta_chi)
        
        # read merger trees file names at this and previous snapshot from minified version 
        fns_this = merger_dir.glob(f'associations_z{z_this:4.3f}.*.asdf.minified')
        fns_prev = merger_dir.glob(f'associations_z{z_prev:4.3f}.*.asdf.minified')
        fns_this = list(fns_this)
        fns_prev = list(fns_prev)
        minified = True

        # if minified files not available,  load the regular files
        if len(list(fns_this)) == 0 or len(list(fns_prev)) == 0:
            fns_this = merger_dir.glob(f'associations_z{z_this:4.3f}.*.asdf')
            fns_prev = merger_dir.glob(f'associations_z{z_prev:4.3f}.*.asdf')
            fns_this = list(fns_this)
            fns_prev = list(fns_prev)
            minified = False
            
        # turn file names into strings
        for counter in range(len(fns_this)):
            fns_this[counter] = str(fns_this[counter])
            fns_prev[counter] = str(fns_prev[counter])
            
        # number of merger tree files
        print("number of files = ", len(fns_this), len(fns_prev))
        assert n_superslabs == len(fns_this) and n_superslabs == len(fns_prev), "Incomplete merger tree files"
        # reorder file names by super slab number
        fns_this = reorder_by_slab(fns_this, minified)
        fns_prev = reorder_by_slab(fns_prev, minified)

        # get number of halos in each slab and number of slabs
        N_halos_slabs_this, slabs_this = get_halos_per_slab(fns_this, minified)
        N_halos_slabs_prev, slabs_prev = get_halos_per_slab(fns_prev, minified)
        
        # We're going to be loading slabs in a rolling fashion:
        # reading the "high" slab at the leading edge, discarding the trailing "low" slab
        # and moving the mid to low. But first we need to read all three to prime the queue
        mt_prev = {}  # indexed by slab num
        mt_prev[(superslab_start-1)%n_superslabs] = get_mt_info(fns_prev[(superslab_start-1)%n_superslabs], fields=fields_mt, minified=minified)
        mt_prev[superslab_start] = get_mt_info(fns_prev[superslab_start], fields=fields_mt, minified=minified)

        weirdness = 0
        # loop over each superslab
        for k in range(superslab_start,n_superslabs):
            # starting and finishing superslab superslabs
            klow = (k-1)%n_superslabs
            khigh = (k+1)%n_superslabs
            
            # slide down by one
            if (klow-1)%n_superslabs in mt_prev:
                del mt_prev[(klow-1)%n_superslabs]
            mt_prev[khigh] = get_mt_info(fns_prev[khigh], fields_mt, minified)
            
            # starting and finishing superslab superslabs
            inds_fn_this = [k]
            inds_fn_prev = np.array([klow,k,khigh],dtype=int)
            print("superslabs loaded in this and previous redshifts = ",inds_fn_this, inds_fn_prev)
            
            # get merger tree data for this snapshot and for the previous one
            mt_data_this = get_mt_info(fns_this[k], fields_mt, minified)
            
            # number of halos in this step and previous step; this depends on the number of files requested
            N_halos_this = np.sum(N_halos_slabs_this[inds_fn_this])
            N_halos_prev = np.sum(N_halos_slabs_prev[inds_fn_prev])
            print("N_halos_this = ", N_halos_this)
            print("N_halos_prev = ", N_halos_prev)

            # organize data into astropy tables
            Merger_this = mt_data_this['merger']
            cols = {col:np.empty(N_halos_prev, dtype=(Merger_this[col].dtype, Merger_this[col].shape[1] if 'Position' in col else 1)) for col in Merger_this.keys()}
            Merger_prev = Table(cols, copy=False)
            offset = 0
            for key in mt_prev.keys():
                size_superslab = len(mt_prev[key]['merger']['HaloIndex'])
                Merger_prev[offset:offset+size_superslab] = mt_prev[key]['merger'][:]
                offset += size_superslab
                
            # mask where no merger tree info is available (because we don'to need to solve for eta star for those)
            noinfo_this = Merger_this['MainProgenitor'] <= 0
            info_this = Merger_this['MainProgenitor'] > 0
            
            # print percentage where no information is available or halo not eligible
            print("percentage no info = ", np.sum(noinfo_this) / len(noinfo_this) * 100.0)

            # no info is denoted by 0 or -999 (or regular if ineligible), but -999 messes with unpacking, so we set it to 0
            Merger_this['MainProgenitor'][noinfo_this] = 0

            # rework the main progenitor and halo indices to return in proper order
            Merger_this['HaloIndex'] = correct_inds(
                Merger_this['HaloIndex'],
                N_halos_slabs_this,
                slabs_this,
                inds_fn_this,
            )
            Merger_this['MainProgenitor'] = correct_inds(
                Merger_this['MainProgenitor'],
                N_halos_slabs_prev,
                slabs_prev,
                inds_fn_prev,
            )
            Merger_prev['HaloIndex'] = correct_inds(
                Merger_prev['HaloIndex'],
                N_halos_slabs_prev,
                slabs_prev,
                inds_fn_prev,
            )
            
            # loop over all origins
            for o in range(len(origins)):
                
                # location of the observer
                origin = origins[o]
                
                # comoving distance to observer
                Merger_this['ComovingDistance'][:] = dist(Merger_this['Position'], origin)
                Merger_prev['ComovingDistance'][:] = dist(Merger_prev['Position'], origin)
                
                # merger tree data of main progenitor halos corresponding to the halos in current snapshot
                Merger_prev_main_this = Merger_prev[Merger_this['MainProgenitor']].copy()

                # if eligible, can be selected for light cone redshift catalog
                if (i != ind_start) or resume_flags[k, o]:
                    # dealing with the fact that these files may not exist for all origins and all superslabs
                    if os.path.exists(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_this, o, k))):
                        eligibility_this = np.load(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_this, o, k)))
                        eligibility_extrap_this = np.load(cat_lc_dir / "tmp" / ("eligibility_extrap_prev_z%4.3f_lc%d.%02d.npy"%(z_this, o, k)))
                    else:
                        eligibility_this = np.ones(N_halos_this, dtype=bool)
                        eligibility_extrap_this = np.ones(N_halos_this, dtype=bool)
                else:
                    eligibility_this = np.ones(N_halos_this, dtype=bool)
                    eligibility_extrap_this = np.ones(N_halos_this, dtype=bool)
                
                # for a newly opened redshift, everyone is eligible to be part of the light cone catalog
                eligibility_prev = np.ones(N_halos_prev, dtype=bool)
                eligibility_extrap_prev = np.ones(N_halos_prev, dtype=bool)

                # only halos without merger tree info are allowed to use the extrap quantities; this is relevant if you're doing
                # mask for eligible halos for light cone origin with and without information
                mask_noinfo_this = noinfo_this & eligibility_this & eligibility_extrap_this
                mask_info_this = info_this & eligibility_this
                
                # halos that have merger tree information
                Merger_this_info = Merger_this[mask_info_this].copy()
                Merger_prev_main_this_info = Merger_prev_main_this[mask_info_this]
                
                # halos that don't have merger tree information
                Merger_this_noinfo = Merger_this[mask_noinfo_this].copy()
                
                # if interpolating to z = 0.1 (kinda ugly way to do this)
                if complete and np.abs(z_this - 0.1) < 1.e-3:
                    print(f"extending {z_this:4.3f} all the way to z = 0")
                    chi_low = 0.
                else:
                    chi_low = chi_this

                # select objects that are crossing the light cones
                #chs = np.array([chi_low, chi_prev], dtype=np.float32) # og
                chs = np.array([chi_low - delta_chi_old / 2.0, chi_prev], dtype=np.float32) # TESTING
                #chs = np.array([chi_low - delta_chi_old / 2.0, chi_prev + delta_chi_new / 2.0], dtype=np.float32) # TESTING weirder idea
                cond_1 = ((Merger_this_info['ComovingDistance'] > chs[0]) & (Merger_this_info['ComovingDistance'] <= chs[1]))
                cond_2 = ((Merger_prev_main_this_info['ComovingDistance'] > chs[0]) & (Merger_prev_main_this_info['ComovingDistance'] <= chs[1]))                
                mask_lc_this_info = cond_1 | cond_2
                del cond_1, cond_2

                # for halos that have no merger tree information, we simply take their current position
                # og
                cond_1 = (Merger_this_noinfo['ComovingDistance'] > chi_low - delta_chi_old / 2.0)
                cond_2 = (Merger_this_noinfo['ComovingDistance'] <= chi_low + delta_chi / 2.0)
                
                # TESTING
                #cond_1 = (Merger_this_noinfo['ComovingDistance'] > chi_low)
                #cond_2 = (Merger_this_noinfo['ComovingDistance'] <= chi_low + delta_chi)
                
                mask_lc_this_noinfo = (cond_1 & cond_2)
                del cond_1, cond_2

                # spare the computer the effort and avert empty array errors
                # TODO: perhaps revise, as sometimes we might have no halos in
                # noinfo but some in info and vice versa
                if np.sum(mask_lc_this_info) == 0 or np.sum(mask_lc_this_noinfo) == 0: continue

                # percentage of objects that are part of this or previous snapshot
                print(
                    "percentage of halos in light cone %d with and without progenitor info = "%o,
                    np.sum(mask_lc_this_info) / len(mask_lc_this_info) * 100.0,
                    np.sum(mask_lc_this_noinfo) / len(mask_lc_this_noinfo) * 100.0,
                )

                # select halos with mt info that have had a light cone crossing
                Merger_this_info_lc = Merger_this_info[mask_lc_this_info]
                Merger_prev_main_this_info_lc = Merger_prev_main_this_info[mask_lc_this_info]
                
                if plot:
                    
                    x_min = -Lbox/2.+k*(Lbox/n_superslabs)
                    x_max = x_min+(Lbox/n_superslabs)

                    x = Merger_this_info_lc['Position'][:,0]
                    choice = (x > x_min) & (x < x_max)
                    
                    y = Merger_this_info_lc['Position'][choice,1]
                    z = Merger_this_info_lc['Position'][choice,2]
                    
                    plt.figure(1)
                    plt.scatter(y, z, color='dodgerblue', s=0.1, label='current objects')

                    plt.legend()
                    plt.axis('equal')
                    plt.savefig('this_%d_%d_%d.png'%(i, k, o))
                    plt.close()
                    
                    x = Merger_prev_main_this_info_lc['Position'][:,0]
                    
                    choice = (x > x_min) & (x < x_max)

                    y = Merger_prev_main_this_info_lc['Position'][choice,1]
                    z = Merger_prev_main_this_info_lc['Position'][choice,2]
                    
                    plt.figure(2)
                    plt.scatter(y, z, color='orangered', s=0.1, label='main progenitor')

                    plt.legend()
                    plt.axis('equal')
                    plt.savefig('prev_%d_%d_%d.png'%(i, k, o))
                    plt.close()
                    
                # select halos without mt info that have had a light cone crossing
                Merger_this_noinfo_lc = Merger_this_noinfo[mask_lc_this_noinfo]
                
                # add columns for new interpolated position, velocity and comoving distance
                Merger_this_info_lc.add_column('InterpolatedPosition',copy=False)
                Merger_this_info_lc.add_column('InterpolatedVelocity',copy=False)
                Merger_this_info_lc.add_column('InterpolatedComoving',copy=False)
                
                # get chi star where lc crosses halo trajectory; bool is False where closer to previous
                (
                    Merger_this_info_lc['InterpolatedComoving'],
                    Merger_this_info_lc['InterpolatedPosition'],
                    Merger_this_info_lc['InterpolatedVelocity'],
                    bool_star_this_info_lc,
                ) = solve_crossing(
                    Merger_prev_main_this_info_lc['ComovingDistance'],
                    Merger_this_info_lc['ComovingDistance'],
                    Merger_prev_main_this_info_lc['Position'],
                    Merger_this_info_lc['Position'],
                    chi_prev,
                    chi_this,
                    Lbox,
                    origin,
                    chs,
                    complete=(complete and np.abs(z_this - 0.1) < 1.e-3),
                )

                
                # number of objects in this light cone
                N_this_star_lc = np.sum(bool_star_this_info_lc)
                N_this_noinfo_lc = np.sum(mask_lc_this_noinfo)

                if i != ind_start or resume_flags[k, o]:
                    # check if we have information about this light cone origin, superslab and epoch
                    if os.path.exists(cat_lc_dir / "tmp" / ("Merger_next_z%4.3f_lc%d.%02d.asdf"%(z_this, o, k))):
                        
                        # load leftover halos from previously loaded redshift
                        with asdf.open(cat_lc_dir / "tmp" / ("Merger_next_z%4.3f_lc%d.%02d.asdf"%(z_this, o, k)), lazy_load=True, copy_arrays=True) as f:
                            Merger_next = f['data']

                        # if you are a halo that appears here, we are gonna ignore you
                        Merger_next = Table(Merger_next)
                        N_next_lc = len(Merger_next['HaloIndex'])
                        
                        # tmp1: to-append and extrapolated from before; tmp2: to-append and interpolated now; get rid od these; TODO: can be done less expensively
                        tmp1 = np.in1d(Merger_next['HaloIndex'][:], pack_inds(Merger_this['HaloIndex'][~eligibility_extrap_this], k))
                        tmp2 = np.in1d(Merger_next['HaloIndex'][:], pack_inds(Merger_this_info_lc['HaloIndex'][:], k))
                        tmp3 = ~(tmp1 & tmp2)
                        
                        # if we found you in the interpolated halos in this redshift, you can't be allowed to be appended as part of Merger_next
                        Merger_next = Merger_next[tmp3]
                        del tmp1, tmp2, tmp3
                        
                        # adding contributions from the previously loaded redshift
                        N_next_lc = len(Merger_next['HaloIndex'])
                        
                    else:
                        N_next_lc = 0
                else:
                    N_next_lc = 0

                # total number of halos belonging to this light cone superslab and origin
                N_lc = N_this_star_lc + N_this_noinfo_lc + N_next_lc
                print("in this snapshot: interpolated, no info, next, total = ", N_this_star_lc * 100.0 / N_lc, N_this_noinfo_lc * 100.0 / N_lc, N_next_lc * 100.0 / N_lc, N_lc)
                
                # save those arrays
                Merger_lc = Table(
                    {'HaloIndex':np.zeros(N_lc, dtype=Merger_this_info_lc['HaloIndex'].dtype),
                     'InterpolatedVelocity': np.zeros(N_lc, dtype=(np.float32,3)),
                     'InterpolatedPosition': np.zeros(N_lc, dtype=(np.float32,3)),
                     'InterpolatedComoving': np.zeros(N_lc, dtype=np.float32)
                    }
                )

                # record interpolated position and velocity for those with info belonging to current redshift
                Merger_lc['InterpolatedPosition'][:N_this_star_lc] = Merger_this_info_lc['InterpolatedPosition'][bool_star_this_info_lc]
                Merger_lc['InterpolatedVelocity'][:N_this_star_lc] = Merger_this_info_lc['InterpolatedVelocity'][bool_star_this_info_lc]
                Merger_lc['InterpolatedComoving'][:N_this_star_lc] = Merger_this_info_lc['InterpolatedComoving'][bool_star_this_info_lc]
                Merger_lc['HaloIndex'][:N_this_star_lc] = Merger_this_info_lc['HaloIndex'][bool_star_this_info_lc]

                # record interpolated position and velocity of the halos in the light cone without progenitor information
                Merger_lc['InterpolatedPosition'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = Merger_this_noinfo_lc['Position']
                Merger_lc['InterpolatedVelocity'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = np.zeros_like(Merger_this_noinfo_lc['Position'])
                Merger_lc['InterpolatedComoving'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = Merger_this_noinfo_lc['ComovingDistance'] # assign comoving distance based on position; used to be np.ones(Merger_this_noinfo_lc['Position'].shape[0])*chi_this
                Merger_lc['HaloIndex'][N_this_star_lc:N_this_star_lc+N_this_noinfo_lc] = Merger_this_noinfo_lc['HaloIndex']
                del Merger_this_noinfo_lc

                # pack halo indices for all halos but those in Merger_next
                Merger_lc['HaloIndex'][:(N_this_star_lc + N_this_noinfo_lc)] = pack_inds(Merger_lc['HaloIndex'][:(N_this_star_lc + N_this_noinfo_lc)], k)
                
                # record information from previously loaded redshift that was postponed
                if i != ind_start or resume_flags[k, o]:
                    if N_next_lc != 0:
                        Merger_lc['InterpolatedPosition'][-N_next_lc:] = Merger_next['InterpolatedPosition'][:]
                        Merger_lc['InterpolatedVelocity'][-N_next_lc:] = Merger_next['InterpolatedVelocity'][:]
                        Merger_lc['InterpolatedComoving'][-N_next_lc:] = Merger_next['InterpolatedComoving'][:]
                        Merger_lc['HaloIndex'][-N_next_lc:] = Merger_next['HaloIndex'][:]
                        del Merger_next
                    resume_flags[k, o] = False
                
                # offset position to make light cone continuous
                Merger_lc['InterpolatedPosition'] = offset_pos(Merger_lc['InterpolatedPosition'], ind_origin = o, all_origins=origins)

                # create directory for this redshift
                os.makedirs(cat_lc_dir / ("z%.3f"%zname_this), exist_ok=True)

                '''
                _, inds = np.unique(Merger_lc['HaloIndex'], return_index=True)
                print("UNIQUE overall = ", len(inds)*100./N_lc)

                _, inds = np.unique(Merger_lc['HaloIndex'][:-N_next_lc], return_index=True)
                print("UNIQUE without next = ", len(inds)*100./len(Merger_lc['HaloIndex'][:-N_next_lc]))

                inds1 = np.arange(N_this_star_lc, dtype=int)
                inds2 = np.arange(N_this_star_lc+N_this_noinfo_lc, N_lc, dtype=int)
                inds3 = np.hstack((inds1, inds2))
                _, inds = np.unique(Merger_lc['HaloIndex'][inds3], return_index=True)
                print("UNIQUE info and next = ", len(inds)*100./len(Merger_lc['HaloIndex'][inds3]))

                inds3 = np.arange(N_this_star_lc, N_lc, dtype=int)
                _, inds = np.unique(Merger_lc['HaloIndex'][inds3], return_index=True)
                print("UNIQUE noinfo and next = ", len(inds)*100./len(Merger_lc['HaloIndex'][inds3]))
                if weirdness == 1:
                    quit()
                weirdness += 1
                '''

                
                # write table with interpolated information
                save_asdf(Merger_lc, ("Merger_lc%d.%02d"%(o,k)), header, cat_lc_dir / ("z%.3f"%zname_this))

                # mask of the extrapolated halos
                mask_extrap = (Merger_this_info_lc['InterpolatedComoving'] > chi_prev) | (Merger_this_info_lc['InterpolatedComoving'] < chi_this)
                print("percentage extrapolated = ", np.sum(mask_extrap)*100./len(mask_extrap))
                
                # TODO: Need to make sure no bugs with eligibility
                # version 1: only the main progenitor is marked ineligible
                # if halo belongs to this redshift catalog or the previous redshift catalog
                eligibility_prev[Merger_prev_main_this_info_lc['HaloIndex'][~mask_extrap]] = False
                eligibility_extrap_prev[Merger_prev_main_this_info_lc['HaloIndex'][mask_extrap]] = False
                print("number eligible = ", np.sum(eligibility_prev), np.sum(eligibility_extrap_prev))
                
                # version 2: all progenitors of halos belonging to this redshift catalog are marked ineligible 
                # run version 1 AND 2 to mark ineligible Merger_next objects to avoid multiple entries
                # Note that some progenitor indices are zeros
                # For best result perhaps combine Progs with MainProgs 
                if "Progenitors" in fields_mt:
                    nums = Merger_this_info_lc['NumProgenitors'][bool_star_this_info_lc]
                    starts = Merger_this_info_lc['StartProgenitors'][bool_star_this_info_lc]
                    # for testing purposes (remove in final version)
                    main_progs = Merger_this_info_lc['MainProgenitor'][bool_star_this_info_lc]
                    progs = mt_data_this['progenitors']['Progenitors']
                    halo_ind_prev = Merger_prev['HaloIndex']

                    N_halos_load = np.array([N_halos_slabs_prev[i] for i in inds_fn_prev])
                    slabs_prev_load = np.array([slabs_prev[i] for i in slabs_prev[inds_fn_prev]],dtype=np.int64)
                    offsets = np.zeros(len(inds_fn_prev), dtype=np.int64)
                    offsets[1:] = np.cumsum(N_halos_load)[:-1]

                    # mark ineligible the progenitors of the halos interpolated in this catalog
                    eligibility_prev = mark_ineligible(nums, starts, main_progs, progs, halo_ind_prev, eligibility_prev, offsets, slabs_prev_load)
                    
                print("number eligible after progenitors removal = ", np.sum(eligibility_prev), np.sum(eligibility_extrap_prev))

                # information to keep for next redshift considered
                N_next = np.sum(~bool_star_this_info_lc)
                Merger_next = Table(
                    {'HaloIndex': np.zeros(N_next, dtype=Merger_lc['HaloIndex'].dtype),
                     'InterpolatedVelocity': np.zeros(N_next, dtype=(np.float32,3)),
                     'InterpolatedPosition': np.zeros(N_next, dtype=(np.float32,3)),
                     'InterpolatedComoving': np.zeros(N_next, dtype=np.float32)
                    }
                )
                Merger_next['HaloIndex'][:] = Merger_prev_main_this_info_lc['HaloIndex'][~bool_star_this_info_lc]
                Merger_next['InterpolatedVelocity'][:] = Merger_this_info_lc['InterpolatedVelocity'][~bool_star_this_info_lc]
                Merger_next['InterpolatedPosition'][:] = Merger_this_info_lc['InterpolatedPosition'][~bool_star_this_info_lc]
                Merger_next['InterpolatedComoving'][:] = Merger_this_info_lc['InterpolatedComoving'][~bool_star_this_info_lc]
                del Merger_this_info_lc, Merger_prev_main_this_info_lc

                if plot:

                    # select the halos in the light cones
                    pos_choice = Merger_lc['InterpolatedPosition']

                    # selecting thin slab
                    pos_x_min = -Lbox/2.+k*(Lbox/n_superslabs)
                    pos_x_max = x_min+(Lbox/n_superslabs)

                    ijk = 0
                    choice = (pos_choice[:, ijk] >= pos_x_min) & (pos_choice[:, ijk] < pos_x_max)

                    circle_this = plt.Circle(
                        (origins[0][1], origins[0][2]), radius=chi_this, color="g", fill=False
                    )
                    circle_prev = plt.Circle(
                        (origins[0][1], origins[0][2]), radius=chi_prev, color="r", fill=False
                    )

                    # clear things for fresh plot
                    ax = plt.gca()
                    ax.cla()

                    # plot particles
                    ax.scatter(pos_choice[choice, 1], pos_choice[choice, 2], s=0.1, alpha=1., color="dodgerblue")

                    # circles for in and prev
                    ax.add_artist(circle_this)
                    ax.add_artist(circle_prev)
                    plt.xlabel([-Lbox/2., Lbox*1.5])
                    plt.ylabel([-Lbox/2., Lbox*1.5])
                    plt.axis("equal")
                    plt.savefig('interp_%d_%d_%d.png'%(i, k, o))
                    #plt.show()
                    plt.close()
                    
                gc.collect()
                
                
                # pack halo indices for the halos in Merger_next
                offset = 0
                for idx in inds_fn_prev:
                    print("k, idx = ",k,idx)
                    choice_idx = (offset <= Merger_next['HaloIndex'][:]) & (Merger_next['HaloIndex'][:] < offset+N_halos_slabs_prev[idx])
                    Merger_next['HaloIndex'][choice_idx] = pack_inds(Merger_next['HaloIndex'][choice_idx]-offset, idx)
                    offset += N_halos_slabs_prev[idx]
                
                # split the eligibility array over three files for the three superslabs it's made up of
                offset = 0
                for idx in inds_fn_prev:
                    eligibility_prev_idx = eligibility_prev[offset:offset+N_halos_slabs_prev[idx]]
                    eligibility_extrap_prev_idx = eligibility_extrap_prev[offset:offset+N_halos_slabs_prev[idx]]
                    # combine current information with previously existing
                    if os.path.exists(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx))):
                        eligibility_prev_old = np.load(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)))
                        eligibility_prev_idx = eligibility_prev_old & eligibility_prev_idx
                        eligibility_extrap_prev_old = np.load(cat_lc_dir / "tmp" / ("eligibility_extrap_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)))
                        eligibility_extrap_prev_idx = eligibility_extrap_prev_old & eligibility_extrap_prev_idx
                        print("Appending to existing eligibility file for %4.3f, %d, %02d!"%(z_prev, o, idx))
                    else:
                        print("First time seeing eligibility file for %4.3f, %d, %02d!"%(z_prev, o, idx))
                    np.save(cat_lc_dir / "tmp" / ("eligibility_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)), eligibility_prev_idx)
                    np.save(cat_lc_dir / "tmp" / ("eligibility_extrap_prev_z%4.3f_lc%d.%02d.npy"%(z_prev, o, idx)), eligibility_extrap_prev_idx)
                    offset += N_halos_slabs_prev[idx]

                # write as table the information about halos that are part of next loaded redshift
                save_asdf(Merger_next, ("Merger_next_z%4.3f_lc%d.%02d"%(z_prev, o, k)), header, cat_lc_dir / "tmp")

                # save redshift of catalog that is next to load and difference in comoving between this and prev
                with open(cat_lc_dir / "tmp" / "build.log", "a") as f:
                    f.writelines(["# Next iteration: \n", "z_prev = %.8f \n"%z_prev, "delta_chi = %.8f \n"%delta_chi, "light_cone = %d \n"%o, "super_slab = %d \n"%k])
                
            del Merger_this, Merger_prev

        # update values for difference in comoving distance
        delta_chi_old = delta_chi
Exemplo n.º 2
0
def main(sim_name,
         z_start,
         z_stop,
         compaso_parent,
         catalog_parent,
         merger_parent,
         save_pos=False):

    compaso_parent = Path(compaso_parent)
    catalog_parent = Path(catalog_parent)
    merger_parent = Path(merger_parent)

    # directory where the CompaSO halo catalogs are saved
    cat_dir = compaso_parent / sim_name / "halos"

    # fields to extract from the CompaSO catalogs
    fields_cat = [
        'id', 'npstartA', 'npoutA', 'N', 'x_L2com', 'v_L2com', 'sigmav3d_L2com'
    ]

    # obtain the redshifts of the CompaSO catalogs
    redshifts = glob.glob(os.path.join(cat_dir, "z*"))
    zs_cat = [extract_redshift(redshifts[i]) for i in range(len(redshifts))]

    # directory where we save the final outputs
    cat_lc_dir = catalog_parent / sim_name / "halos_light_cones"

    # directory where the merger tree files are kept
    merger_dir = merger_parent / sim_name

    # if merger tree redshift information has been saved, load it (if not, save it)
    if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
        np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
    zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")

    # names of the merger tree file for a given redshift
    merger_fns = list(merger_dir.glob("associations_z%4.3f.*.asdf" % zs_mt[0]))

    # number of chunks
    n_chunks = len(merger_fns)
    print("number of chunks = ", n_chunks)

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists(
            Path("data_headers") / sim_name /
            "coord_dist.npy") or not os.path.exists(
                Path("data_headers") / sim_name / "redshifts.npy"):
        zs_all, steps, chis_all = get_lc_info("all_headers")
        os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
        np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
        np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
    zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
    chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
    zs_all[-1] = float(
        "%.1f" % zs_all[-1]
    )  # LHG: I guess this is trying to match up to some filename or something?

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all, chis_all)
    z_of_chi = interp1d(chis_all, zs_all)

    # initial redshift where we start building the trees
    ind_start = np.argmin(np.abs(zs_mt - z_start))
    ind_stop = np.argmin(np.abs(zs_mt - z_stop))

    # loop over each merger tree redshift
    for i in range(ind_start, ind_stop + 1):

        # starting snapshot
        z_mt = zs_mt[i]
        z_cat = zs_cat[np.argmin(np.abs(z_mt - zs_cat))]
        print("Redshift = %.3f %.3f" % (z_mt, z_cat))

        # names of the merger tree file for this redshift
        merger_fns = list(merger_dir.glob("associations_z%4.3f.*.asdf" % z_mt))
        for counter in range(len(merger_fns)):
            merger_fns[counter] = str(merger_fns[counter])

        # slab indices and number of halos per slab
        N_halo_slabs, slabs = get_halos_per_slab(merger_fns, minified=False)

        # names of the light cone merger tree file for this redshift
        merger_lc_fns = list(
            (cat_lc_dir / ("z%.3f" % z_mt)).glob("Merger_lc*.asdf"))
        for counter in range(len(merger_lc_fns)):
            merger_lc_fns[counter] = str(merger_lc_fns[counter])

        # slab indices, origins and number of halos per slab
        N_halo_slabs_lc, slabs_lc, origins_lc = get_halos_per_slab_origin(
            merger_lc_fns, minified=False)

        # total number of halos in this light cone redshift
        N_lc = np.sum(N_halo_slabs_lc)
        print("total number of lc halos = ", N_lc)

        Merger_lc = Table({
            'HaloIndex':
            np.zeros(N_lc, dtype=np.int64),
            'InterpolatedVelocity':
            np.zeros(N_lc, dtype=(np.float32, 3)),
            'InterpolatedPosition':
            np.zeros(N_lc, dtype=(np.float32, 3)),
            'InterpolatedComoving':
            np.zeros(N_lc, dtype=np.float32)
        })

        # initialize index for filling halo information
        start = 0
        file_no = 0

        # offset for correcting halo indices
        offset = 0

        # loop over each chunk
        for k in range(n_chunks):
            # assert chunk number is correct
            assert slabs[k] == k, "the chunks are not matching"

            # origins for which information is available
            origins_k = origins_lc[slabs_lc == k]

            # loop over each observer origin
            for o in origins_k:
                # load the light cone arrays
                with asdf.open(cat_lc_dir / ("z%.3f" % z_mt) /
                               ("Merger_lc%d.%02d.asdf" % (o, k)),
                               lazy_load=True,
                               copy_arrays=True) as f:
                    merger_lc = f['data']

                # number of halos in this file
                num = N_halo_slabs_lc[file_no]
                file_no += 1

                # the files should be congruent
                N_halo_lc = len(merger_lc['HaloIndex'])
                assert N_halo_lc == num, "file order is messed up"

                # translate information from this file to the complete array
                for key in Merger_lc.keys():
                    Merger_lc[key][start:start + num] = merger_lc[key][:]

                # add halos in this file
                start += num

            # offset all halos in given chunk
            offset += N_halo_slabs[k]

        # unpack the fields of the merger tree catalogs
        halo_ind_lc = Merger_lc['HaloIndex'][:]
        pos_interp_lc = Merger_lc['InterpolatedPosition'][:]
        vel_interp_lc = Merger_lc['InterpolatedVelocity'][:]
        chi_interp_lc = Merger_lc['InterpolatedComoving'][:]
        del Merger_lc

        # unpack halo indices
        halo_ind_lc = correct_all_inds(halo_ind_lc, N_halo_slabs, slabs,
                                       n_chunks)

        # catalog directory
        catdir = str(cat_dir / ("z%.3f" % z_cat))

        # load halo catalog, setting unpack to False for speed
        if save_pos:
            try:
                cat = CompaSOHaloCatalog(catdir,
                                         load_subsamples='A_halo_all',
                                         fields=fields_cat,
                                         unpack_bits=False)
                loaded_pos = True
            except:
                cat = CompaSOHaloCatalog(catdir,
                                         load_subsamples='A_halo_pid',
                                         fields=fields_cat,
                                         unpack_bits=False)
                print(
                    "Particle positions are not available for this redshift. Saving only PIDs"
                )
                loaded_pos = False
        else:
            cat = CompaSOHaloCatalog(catdir,
                                     load_subsamples='A_halo_pid',
                                     fields=fields_cat,
                                     unpack_bits=False)
            loaded_pos = False

        # halo catalog
        halo_table = cat.halos[halo_ind_lc]
        header = cat.header
        N_halos = len(cat.halos)
        print("N_halos = ", N_halos)

        # load the particle ids
        pid = cat.subsamples['pid']
        if save_pos and loaded_pos:
            pos = cat.subsamples['pos']
        del cat

        # reindex npstart and npout for the new catalogs
        npstart = halo_table['npstartA']
        npout = halo_table['npoutA']
        if save_pos and loaded_pos:
            pid_new, pos_new, npstart_new, npout_new = reindex_pid_pos(
                pid, pos, npstart, npout)
            del pid, pos
        else:
            pid_new, npstart_new, npout_new = reindex_pid(pid, npstart, npout)
            del pid
        halo_table['npstartA'] = npstart_new
        halo_table['npoutA'] = npout_new
        del npstart, npout
        del npstart_new, npout_new

        # create particle array
        if save_pos and loaded_pos:
            pid_table = Table({
                'pid': np.zeros(len(pid_new), pid_new.dtype),
                'pos': np.zeros(pos_new.shape, pos_new.dtype)
            })
            pid_table['pos'] = pos_new
        else:
            pid_table = Table({'pid': np.zeros(len(pid_new), pid_new.dtype)})
        pid_table['pid'] = pid_new
        del pid_new

        # isolate halos that did not have interpolation and get the velocity from the halo info files
        not_interp = (np.sum(np.abs(vel_interp_lc), axis=1) - 0.) < 1.e-6
        vel_interp_lc[not_interp] = halo_table['v_L2com'][not_interp]
        print("percentage not interpolated = ",
              100. * np.sum(not_interp) / len(not_interp))

        # append new fields
        halo_table['index_halo'] = halo_ind_lc
        halo_table['pos_interp'] = pos_interp_lc
        halo_table['vel_interp'] = vel_interp_lc
        halo_table['redshift_interp'] = z_of_chi(chi_interp_lc)

        del halo_ind_lc, pos_interp_lc, vel_interp_lc, not_interp, chi_interp_lc

        # save to files
        save_asdf(halo_table, "halo_info_lc", header,
                  cat_lc_dir / ("z%4.3f" % z_mt))
        save_asdf(pid_table, "pid_lc", header, cat_lc_dir / ("z%4.3f" % z_mt))

        # delete things at the end
        del pid_table
        del halo_table

        gc.collect()
Exemplo n.º 3
0
def main(sim_name, z_lowest, z_highest, light_cone_parent, catalog_parent,
         merger_parent):
    light_cone_parent = Path(light_cone_parent)
    catalog_parent = Path(catalog_parent)
    merger_parent = Path(merger_parent)

    # directory where the merger tree files are kept
    merger_dir = merger_parent / sim_name
    header = get_one_header(merger_dir)

    # simulation parameters
    Lbox = header['BoxSize']
    PPD = header['ppd']

    # directory where we have saved the final outputs from merger trees and halo catalogs
    cat_lc_dir = catalog_parent / sim_name / "halos_light_cones"

    # directory where light cones are saved
    lc_dir = light_cone_parent / sim_name / "lightcones"

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists(
            Path("data_headers") / sim_name /
            "coord_dist.npy") or not os.path.exists(
                Path("data_headers") / sim_name /
                "redshifts.npy") or not os.path.exists(
                    Path("data_headers") / sim_name / "steps.npy"):
        zs_all, steps, chis_all = get_lc_info("all_headers")
        os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
        np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
        np.save(Path("data_headers") / sim_name / "steps.npy", steps_all)
        np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
    zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
    steps_all = np.load(Path("data_headers") / sim_name / "steps.npy")
    chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
    zs_all[-1] = float("%.1f" % zs_all[-1])

    # if merger tree redshift information has been saved, load it (if not, save it)
    if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
        np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
    zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")
    # correct for interpolation out of bounds error
    zs_mt = zs_mt[(zs_mt <= zs_all.max()) & (zs_mt >= zs_all.min())]

    # time step of furthest and closest shell in the light cone files
    step_min = np.min(steps_all)
    step_max = np.max(steps_all)

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all, chis_all)
    z_of_chi = interp1d(chis_all, zs_all)

    # conformal distance of the mtree catalogs
    chis_mt = chi_of_z(zs_mt)

    # Read light cone file names
    lc_rv_fns = sorted(glob.glob(os.path.join(lc_dir, 'rv/LightCone*')))
    lc_pid_fns = sorted(glob.glob(os.path.join(lc_dir, 'pid/LightCone*')))

    # select the final and initial step for computing the convergence map
    step_start = steps_all[np.argmin(np.abs(zs_all - z_highest))]
    step_stop = steps_all[np.argmin(np.abs(zs_all - z_lowest))]
    print("step_start = ", step_start)
    print("step_stop = ", step_stop)

    # these are the time steps associated with each of the light cone files
    step_fns = np.zeros(len(lc_pid_fns), dtype=int)
    for i in range(len(lc_pid_fns)):
        step_fns[i] = extract_steps(lc_pid_fns[i])

    # initialize previously loaded mt file name
    currently_loaded_zs = []
    currently_loaded_headers = []
    currently_loaded_npouts = []
    currently_loaded_pids = []
    currently_loaded_tables = []
    for step in range(step_start, step_stop + 1):
        # this is because our arrays start correspond to step numbers: step_start, step_start+1, step_start+2 ... step_stop
        j = step - step_min
        step_this = steps_all[j]
        z_this = zs_all[j]
        chi_this = chis_all[j]

        assert step_this == step, "You've messed up the counts"
        print("light cones step, redshift = ", step_this, z_this)

        # get the two redshifts it's straddling and the mean chi
        mt_fns, mt_zs, mt_chis, halo_mt_fns = get_mt_fns(
            z_this, zs_mt, chis_mt, cat_lc_dir)

        # get the mean chi
        mt_chi_mean = np.mean(mt_chis)

        # how many shells are we including on both sides, including mid point (total of 2j+1)
        buffer_no = 2

        # is this the redshift that's closest to the bridge between two redshifts
        mid_bool = (np.argmin(np.abs(mt_chi_mean - chis_all))
                    <= j + buffer_no) & (np.argmin(
                        np.abs(mt_chi_mean - chis_all)) >= j - buffer_no)

        # if not in between two redshifts, we just need one catalog -- the one it is closest to
        if not mid_bool:
            mt_fns = [mt_fns[np.argmin(np.abs(mt_chis - chi_this))]]
            halo_mt_fns = [halo_mt_fns[np.argmin(np.abs(mt_chis - chi_this))]]
            mt_zs = [mt_zs[np.argmin(np.abs(mt_chis - chi_this))]]

        # load this and prev
        for i in range(len(mt_fns)):
            # check if catalog already loaded
            if mt_zs[i] in currently_loaded_zs:
                print("skipped loading catalog ", mt_zs[i])
                continue

            # discard the old redshift catalog and record its data
            if len(currently_loaded_zs) >= 2:

                # save the information about that redshift
                save_asdf(currently_loaded_tables[0], "pid_rv_lc",
                          currently_loaded_headers[0],
                          cat_lc_dir / ("z%4.3f" % currently_loaded_zs[0]))
                print("saved catalog = ", currently_loaded_zs[0])

                # discard it from currently loaded
                currently_loaded_zs = currently_loaded_zs[1:]
                currently_loaded_headers = currently_loaded_headers[1:]
                currently_loaded_pids = currently_loaded_pids[1:]
                currently_loaded_tables = currently_loaded_tables[1:]

            # load new merger tree catalog
            mt_pid, header = load_mt_pid(mt_fns[i], Lbox, PPD)
            halo_mt_npout = load_mt_npout(halo_mt_fns[i])

            # start the light cones table for this redshift
            lc_table_final = np.empty(len(mt_pid),dtype=[('pid',mt_pid.dtype),('pos',(np.float32,3)),\
                                                         ('vel',(np.float32,3)),('redshift',np.float32)])

            # append the newly loaded catalog
            currently_loaded_zs.append(mt_zs[i])
            currently_loaded_headers.append(header)
            currently_loaded_pids.append(mt_pid)
            currently_loaded_npouts.append(halo_mt_npout)
            currently_loaded_tables.append(lc_table_final)

        print("currently loaded redshifts = ", currently_loaded_zs)
        print("using redshifts = ", mt_zs)

        # find all light cone file names that correspond to this time step
        choice_fns = np.where(step_fns == step_this)[0]
        # number of light cones at this step
        num_lc = len(choice_fns)

        assert (num_lc <= 3) & (
            num_lc > 0
        ), "There can be at most three files in the light cones corresponding to a given step"
        # loop through those one to three light cone files
        for i_choice, choice_fn in enumerate(choice_fns):
            print("light cones file = ", lc_pid_fns[choice_fn])

            # load particles in light cone
            lc_pid, lc_rv = load_lc_pid_rv(lc_pid_fns[choice_fn],
                                           lc_rv_fns[choice_fn], Lbox, PPD)

            if 'LightCone1' in lc_pid_fns[choice_fn]:
                offset_lc = np.array([0., 0., Lbox])
            elif 'LightCone2' in lc_pid_fns[choice_fn]:
                offset_lc = np.array([0., Lbox, 0.])
            else:
                offset_lc = np.array([0., 0., 0.])

            # loop over the one or two closest catalogs
            for i in range(len(mt_fns)):
                which_mt = np.where(mt_zs[i] == currently_loaded_zs)[0]
                mt_pid = currently_loaded_pids[which_mt[0]]
                halo_mt_npout = currently_loaded_npouts[which_mt[0]]
                header = currently_loaded_headers[which_mt[0]]
                lc_table_final = currently_loaded_tables[which_mt[0]]
                mt_z = currently_loaded_zs[which_mt[0]]

                # match merger tree and light cone pids
                print("starting")

                # original version start
                t1 = time.time()
                i_sort_lc_pid = np.argsort(lc_pid)
                mt_in_lc = match(mt_pid, lc_pid, arr2_index=i_sort_lc_pid)
                comm2 = mt_in_lc[mt_in_lc > -1]
                comm1 = np.arange(len(mt_pid), dtype=int)[mt_in_lc > -1]
                pid_mt_lc = mt_pid[mt_in_lc > -1]
                print("time = ", time.time() - t1)

                # select the intersected positions
                pos_mt_lc, vel_mt_lc = unpack_rvint(lc_rv[comm2], Lbox)

                # print percentage of matched pids
                print("at z = %.3f, matched = " % mt_z,
                      len(comm1) * 100. / (len(mt_pid)))
                # original version end
                '''
                # alternative Lehman implementation start
                t1 = time.time()
                comm1, nmatch, hrvint = match_halo_pids_to_lc_rvint(halo_mt_npout, mt_pid, lc_rv, lc_pid)
                print("at z = %.3f, matched = "%mt_z,len(hrvint)*100./(len(mt_pid)))
                print("time = ", time.time()-t1)
                
                pos_mt_lc, vel_mt_lc = unpack_rvint(hrvint,Lbox)
                pid_mt_lc = mt_pid[comm1]                
                # alternative Lehman implementation end
                '''

                # offset depending on which light cone we are at
                pos_mt_lc += offset_lc

                # save the pid, position, velocity and redshift
                lc_table_final['pid'][comm1] = pid_mt_lc
                lc_table_final['pos'][comm1] = pos_mt_lc
                lc_table_final['vel'][comm1] = vel_mt_lc
                lc_table_final['redshift'][comm1] = np.ones(
                    len(pid_mt_lc)) * z_this
            print("-------------------")

    # close the two that are currently open
    for i in range(len(currently_loaded_zs)):
        # save the information about that redshift
        save_asdf(currently_loaded_tables[0], "pid_rv_lc",
                  currently_loaded_headers[0],
                  cat_lc_dir / ("z%4.3f" % currently_loaded_zs[0]))
        print("saved catalog = ", currently_loaded_zs[0])

        # discard it from currently loaded
        currently_loaded_zs = currently_loaded_zs[1:]
        currently_loaded_headers = currently_loaded_headers[1:]
        currently_loaded_pids = currently_loaded_pids[1:]
        currently_loaded_tables = currently_loaded_tables[1:]
Exemplo n.º 4
0
def main(sim_name,
         z_start,
         z_stop,
         merger_parent,
         catalog_parent,
         resume=False,
         plot=False):
    '''
    Main function.
    The algorithm: for each merger tree epoch, for 
    each superslab, for each light cone origin,
    compute the intersection of the light cone with
    each halo, using the interpolated position
    to the previous merger epoch (and possibly a 
    velocity correction).  If the intersection is
    between the current and previous merger epochs, 
    then record the closer one as that halo's
    epoch and mark its progenitors as ineligible.
    Will need one padding superslab in the previous
    merger epoch.  Can process in a rolling fashion.
    '''

    merger_dir = merger_parent / sim_name
    header = get_one_header(merger_dir)

    # simulation parameters
    Lbox = header['BoxSize']
    # location of the LC origins in Mpc/h
    origins = np.array(header['LightConeOrigins']).reshape(-1, 3)

    # just for testing with highbase. remove!
    origins /= 2.

    # directory where we save the final outputs
    cat_lc_dir = catalog_parent / sim_name / "halos_light_cones/"
    os.makedirs(cat_lc_dir, exist_ok=True)

    # directory where we save the current state if we want to resume
    os.makedirs(cat_lc_dir / "tmp", exist_ok=True)
    with open(cat_lc_dir / "tmp" / "tmp.log", "a") as f:
        f.writelines([
            "# Starting light cone catalog construction in simulation %s \n" %
            sim_name
        ])

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists("data_headers/coord_dist.npy") or not os.path.exists(
            "data_headers/redshifts.npy"):
        zs_all, steps, chis_all = get_lc_info("all_headers")
        np.save("data_headers/redshifts.npy", zs_all)
        np.save("data_headers/coord_dist.npy", chis_all)
    zs_all = np.load("data_headers/redshifts.npy")
    chis_all = np.load("data_headers/coord_dist.npy")
    zs_all[-1] = float(
        "%.1f" % zs_all[-1]
    )  # LHG: I guess this is trying to match up to some filename or something?

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all, chis_all)
    z_of_chi = interp1d(chis_all, zs_all)

    # more accurate, slightly slower
    if not os.path.exists("data/zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        np.save("data/zs_mt.npy", zs_mt)
    zs_mt = np.load("data/zs_mt.npy")

    # number of chunks
    n_chunks = len(
        list(merger_dir.glob("associations_z%4.3f.*.asdf" % zs_mt[0])))
    print("number of chunks = ", n_chunks)

    # starting and finishing redshift indices indices
    ind_start = np.argmin(np.abs(zs_mt - z_start))
    ind_stop = np.argmin(np.abs(zs_mt - z_stop))

    if resume:
        # if user wants to resume from previous state, create padded array for marking whether chunk has been loaded
        resume_flags = np.ones((n_chunks, origins.shape[1]), dtype=bool)

        # previous redshift, distance between shells
        infile = InputFile(cat_lc_dir / "tmp" / "tmp.log")
        z_this_tmp = infile.z_prev
        delta_chi_old = infile.delta_chi
        chunk = infile.super_slab
        assert (
            np.abs(n_chunks - 1 - chunk) < 1.0e-6
        ), "Your recorded state did not complete all chunks, can't resume from old"
        assert (
            np.abs(zs_mt[ind_start] - z_this_tmp) < 1.0e-6
        ), "Your recorded state is not for the correct redshift, can't resume from old"
        with open(cat_lc_dir / "tmp" / "tmp.log", "a") as f:
            f.writelines(
                ["# Resuming from redshift z = %4.3f \n" % z_this_tmp])
    else:
        # delete the exisiting temporary files
        tmp_files = list((cat_lc_dir / "tmp").glob("*"))
        for i in range(len(tmp_files)):
            os.unlink(str(tmp_files[i]))
        resume_flags = np.zeros((n_chunks, origins.shape[0]), dtype=bool)

    # fields to extract from the merger trees
    # fields_mt = ['HaloIndex','HaloMass','Position','MainProgenitor','Progenitors','NumProgenitors']
    # lighter version
    fields_mt = ['HaloIndex', 'Position', 'MainProgenitor']

    # redshift of closest point on wall between original and copied box
    z1 = z_of_chi(0.5 * Lbox - origins[0][0])
    # redshift of closest point where all three boxes touch
    # z2 = z_of_chi((0.5*Lbox-origin[0])*np.sqrt(2))
    # furthest point where all three boxes touch;
    z3 = z_of_chi((0.5 * Lbox - origins[0][0]) * np.sqrt(3))

    # initialize difference between the conformal time of last two shells
    delta_chi_old = 0.0

    for i in range(ind_start, ind_stop + 1):

        # this snapshot redshift and the previous
        z_this = zs_mt[i]
        z_prev = zs_mt[i + 1]
        print("redshift of this and the previous snapshot = ", z_this, z_prev)

        # coordinate distance of the light cone at this redshift and the previous
        assert z_this >= np.min(
            zs_all
        ), "You need to set starting redshift to the smallest value of the merger tree"
        chi_this = chi_of_z(z_this)
        chi_prev = chi_of_z(z_prev)
        delta_chi = chi_prev - chi_this
        print("comoving distance between this and previous snapshot = ",
              delta_chi)

        # read merger trees file names at this and previous snapshot from minified version
        # LHG: do we need to support both minified and non-minified separately? I thought all the data was in the minifted format now.
        fns_this = list(
            merger_dir.glob(f'associations_z{z_this:4.3f}.*.asdf.minified'))
        fns_prev = list(
            merger_dir.glob(f'associations_z{z_prev:4.3f}.*.asdf.minified'))
        minified = True

        # if minified files not available, load the regular files
        if len(list(fns_this)) == 0 or len(list(fns_prev)) == 0:
            fns_this = list(
                merger_dir.glob(f'associations_z{z_this:4.3f}.*.asdf'))
            fns_prev = list(
                merger_dir.glob(f'associations_z{z_prev:4.3f}.*.asdf'))
            minified = False

        # turn file names into strings
        fns_this = [str(f) for f in fns_this]
        fns_prev = [str(f) for f in fns_prev]

        # number of merger tree files
        print("number of files = ", len(fns_this), len(fns_prev))
        assert n_chunks == len(fns_this) and n_chunks == len(
            fns_prev), "Incomplete merger tree files"

        # reorder file names by super slab number
        fns_this = reorder_by_slab(fns_this, minified)
        fns_prev = reorder_by_slab(fns_prev, minified)

        # maybe we want to support resuming from arbitrary superslab
        first_ss = 0

        # We're going to be loading slabs in a rolling fashion:
        # reading the "high" slab at the leading edge, discarding the trailing "low" slab
        # and moving the mid to low. But first we need to read all three to prime the queue
        mt_prev = {}  # indexed by slab num
        mt_prev[(first_ss - 1) % n_chunks] = get_mt_info(
            fns_prev[(first_ss - 1) % n_chunks],
            fields=fields_mt,
            minified=minified)
        mt_prev[first_ss] = get_mt_info(fns_prev[first_ss],
                                        fields=fields_mt,
                                        minified=minified)

        # for each chunk
        for k in range(first_ss, n_chunks):
            # starting and finishing superslab chunks
            klow = (k - 1) % n_chunks
            khigh = (k + 1) % n_chunks

            # Slide down by one
            if (klow - 1) % n_chunks in mt_prev:
                del mt_prev[(klow - 1) % n_chunks]
            mt_prev[khigh] = get_mt_info(fns_prev[khigh],
                                         fields=fields_mt,
                                         minified=minified)

            print(
                f"Loaded chunk {k} in this redshift, and {tuple(mt_prev)} in previous"
            )
            # get merger tree data for this snapshot and for the previous one
            mt_data_this, halos_per_slab_this = get_mt_info(fns_this[k],
                                                            fields=fields_mt,
                                                            minified=minified)

            # ======== LHG: haven't edited below here

            # number of halos in this step and previous step; this depends on the number of files requested
            N_halos_this = np.sum(N_halos_slabs_this[inds_fn_this])
            N_halos_prev = np.sum(N_halos_slabs_prev[inds_fn_prev])
            print("N_halos_this = ", N_halos_this)
            print("N_halos_prev = ", N_halos_prev)

            # mask where no merger tree info is available (because we don'to need to solve for eta star for those)
            noinfo_this = Merger_this['MainProgenitor'] <= 0
            info_this = Merger_this['MainProgenitor'] > 0

            # print percentage where no information is available or halo not eligible
            print("percentage no info = ",
                  np.sum(noinfo_this) / len(noinfo_this) * 100.0)

            # no info is denoted by 0 or -999 (or regular if ineligible), but -999 messes with unpacking, so we set it to 0
            Merger_this['MainProgenitor'][noinfo_this] = 0

            # rework the main progenitor and halo indices to return in proper order
            Merger_this['HaloIndex'] = correct_inds(
                Merger_this['HaloIndex'],
                N_halos_slabs_this,
                slabs_this,
                inds_fn_this,
            )
            Merger_this['MainProgenitor'] = correct_inds(
                Merger_this['MainProgenitor'],
                N_halos_slabs_prev,
                slabs_prev,
                inds_fn_prev,
            )
            Merger_prev['HaloIndex'] = correct_inds(
                Merger_prev['HaloIndex'],
                N_halos_slabs_prev,
                slabs_prev,
                inds_fn_prev,
            )

            # loop over all origins
            for o in range(len(origins)):

                # location of the observer
                origin = origins[o]

                # comoving distance to observer
                Merger_this['ComovingDistance'] = dist(Merger_this['Position'],
                                                       origin)
                Merger_prev['ComovingDistance'] = dist(Merger_prev['Position'],
                                                       origin)

                # merger tree data of main progenitor halos corresponding to the halos in current snapshot
                Merger_prev_main_this = Merger_prev[
                    Merger_this['MainProgenitor']].copy()

                # if eligible, can be selected for light cone redshift catalog;
                if i != ind_start or resume_flags[k, o]:
                    # dealing with the fact that these files may not exist for all origins and all chunks
                    if os.path.exists(
                            cat_lc_dir / "tmp" /
                        ("eligibility_prev_z%4.3f_lc%d.%02d.npy" %
                         (z_this, o, k))):
                        eligibility_this = np.load(
                            cat_lc_dir / "tmp" /
                            ("eligibility_prev_z%4.3f_lc%d.%02d.npy" %
                             (z_this, o, k)))
                    else:
                        eligibility_this = np.ones(N_halos_this, dtype=bool)
                else:
                    eligibility_this = np.ones(N_halos_this, dtype=bool)

                # for a newly opened redshift, everyone is eligible to be part of the light cone catalog
                eligibility_prev = np.ones(N_halos_prev, dtype=bool)

                # mask for eligible halos for light cone origin with and without information
                mask_noinfo_this = noinfo_this & eligibility_this
                mask_info_this = info_this & eligibility_this

                # halos that have merger tree information
                Merger_this_info = Merger_this[mask_info_this].copy()
                Merger_prev_main_this_info = Merger_prev_main_this[
                    mask_info_this]

                # halos that don't have merger tree information
                Merger_this_noinfo = Merger_this[mask_noinfo_this].copy()

                # select objects that are crossing the light cones
                # TODO: revise conservative choice if stranded between two ( & \) less conservative ( | \ )
                mask_lc_this_info = (
                    ((Merger_this_info['ComovingDistance'] > chi_this) &
                     (Merger_this_info['ComovingDistance'] <= chi_prev)))
                #| ((Merger_prev_main_this_info['ComovingDistance'] > chi_this) & (Merger_prev_main_this_info['ComovingDistance'] <= chi_prev))

                mask_lc_this_noinfo = ((Merger_this_noinfo['ComovingDistance']
                                        > chi_this - delta_chi_old / 2.0)
                                       &
                                       (Merger_this_noinfo['ComovingDistance']
                                        <= chi_this + delta_chi / 2.0))

                # spare the computer the effort and avert empty array errors
                # TODO: perhaps revise, as sometimes we might have no halos in
                # noinfo but some in info and vice versa
                if np.sum(mask_lc_this_info) == 0 or np.sum(
                        mask_lc_this_noinfo) == 0:
                    continue

                # percentage of objects that are part of this or previous snapshot
                print(
                    "percentage of halos in light cone %d with and without progenitor info = "
                    % o,
                    np.sum(mask_lc_this_info) / len(mask_lc_this_info) * 100.0,
                    np.sum(mask_lc_this_noinfo) / len(mask_lc_this_noinfo) *
                    100.0,
                )

                # select halos with mt info that have had a light cone crossing
                Merger_this_info_lc = Merger_this_info[mask_lc_this_info]
                Merger_prev_main_this_info_lc = Merger_prev_main_this_info[
                    mask_lc_this_info]

                if plot:
                    x_min = -Lbox / 2. + k * (Lbox / n_chunks)
                    x_max = x_min + (Lbox / n_chunks)

                    x = Merger_this_info_lc['Position'][:, 0]
                    choice = (x > x_min) & (x < x_max)

                    y = Merger_this_info_lc['Position'][choice, 1]
                    z = Merger_this_info_lc['Position'][choice, 2]

                    plt.figure(1)
                    plt.scatter(y,
                                z,
                                color='dodgerblue',
                                s=0.1,
                                label='current objects')

                    plt.legend()
                    plt.axis('equal')

                    x = Merger_prev_main_this_info_lc['Position'][:, 0]

                    choice = (x > x_min) & (x < x_max)

                    y = Merger_prev_main_this_info_lc['Position'][choice, 1]
                    z = Merger_prev_main_this_info_lc['Position'][choice, 2]

                    plt.figure(2)
                    plt.scatter(y,
                                z,
                                color='orangered',
                                s=0.1,
                                label='main progenitor')

                    plt.legend()
                    plt.axis('equal')
                    plt.show()

                # select halos without mt info that have had a light cone crossing
                Merger_this_noinfo_lc = Merger_this_noinfo[mask_lc_this_noinfo]

                # add columns for new interpolated position, velocity and comoving distance
                Merger_this_info_lc.add_column('InterpolatedPosition',
                                               copy=False)
                Merger_this_info_lc.add_column('InterpolatedVelocity',
                                               copy=False)
                Merger_this_info_lc.add_column('InterpolatedComoving',
                                               copy=False)

                # get chi star where lc crosses halo trajectory; bool is False where closer to previous
                (
                    Merger_this_info_lc['InterpolatedComoving'],
                    Merger_this_info_lc['InterpolatedPosition'],
                    Merger_this_info_lc['InterpolatedVelocity'],
                    bool_star_this_info_lc,
                ) = solve_crossing(
                    Merger_prev_main_this_info_lc['ComovingDistance'],
                    Merger_this_info_lc['ComovingDistance'],
                    Merger_prev_main_this_info_lc['Position'],
                    Merger_this_info_lc['Position'], chi_prev, chi_this, Lbox,
                    origin)

                # number of objects in this light cone
                N_this_star_lc = np.sum(bool_star_this_info_lc)
                N_this_noinfo_lc = np.sum(mask_lc_this_noinfo)

                if i != ind_start or resume_flags[k, o]:
                    # cheap way to deal with the fact that sometimes we won't have information about all light cone origins for certain chunks and epochs
                    if os.path.exists(cat_lc_dir / "tmp" /
                                      ("Merger_next_z%4.3f_lc%d.%02d.asdf" %
                                       (z_this, o, k))):
                        # load leftover halos from previously loaded redshift
                        with asdf.open(cat_lc_dir / "tmp" /
                                       ("Merger_next_z%4.3f_lc%d.%02d.asdf" %
                                        (z_this, o, k))) as f:
                            Merger_next = f['data']

                        # adding contributions from the previously loaded redshift
                        N_next_lc = len(Merger_next['HaloIndex'])
                    else:
                        N_next_lc = 0
                else:
                    N_next_lc = 0

                # total number of halos belonging to this light cone superslab and origin
                N_lc = N_this_star_lc + N_this_noinfo_lc + N_next_lc

                print(
                    "in this snapshot: interpolated, no info, next, total = ",
                    N_this_star_lc * 100.0 / N_lc,
                    N_this_noinfo_lc * 100.0 / N_lc, N_next_lc * 100.0 / N_lc,
                    N_lc)

                # save those arrays
                Merger_lc = Table({
                    'HaloIndex':
                    np.zeros(N_lc,
                             dtype=Merger_this_info_lc['HaloIndex'].dtype),
                    'InterpolatedVelocity':
                    np.zeros(N_lc, dtype=(np.float32, 3)),
                    'InterpolatedPosition':
                    np.zeros(N_lc, dtype=(np.float32, 3)),
                    'InterpolatedComoving':
                    np.zeros(N_lc, dtype=np.float32)
                })

                # record interpolated position and velocity for those with info belonging to current redshift
                Merger_lc[
                    'InterpolatedPosition'][:
                                            N_this_star_lc] = Merger_this_info_lc[
                                                'InterpolatedPosition'][
                                                    bool_star_this_info_lc]
                Merger_lc[
                    'InterpolatedVelocity'][:
                                            N_this_star_lc] = Merger_this_info_lc[
                                                'InterpolatedVelocity'][
                                                    bool_star_this_info_lc]
                Merger_lc[
                    'InterpolatedComoving'][:
                                            N_this_star_lc] = Merger_this_info_lc[
                                                'InterpolatedComoving'][
                                                    bool_star_this_info_lc]
                Merger_lc['HaloIndex'][:N_this_star_lc] = Merger_this_info_lc[
                    'HaloIndex'][bool_star_this_info_lc]

                # record interpolated position and velocity of the halos in the light cone without progenitor information
                Merger_lc['InterpolatedPosition'][
                    N_this_star_lc:N_this_star_lc +
                    N_this_noinfo_lc] = Merger_this_noinfo_lc['Position']
                Merger_lc['InterpolatedVelocity'][
                    N_this_star_lc:N_this_star_lc +
                    N_this_noinfo_lc] = np.zeros_like(
                        Merger_this_noinfo_lc['Position'])
                Merger_lc['InterpolatedComoving'][
                    N_this_star_lc:N_this_star_lc +
                    N_this_noinfo_lc] = np.ones(
                        Merger_this_noinfo_lc['Position'].shape[0]) * chi_this
                Merger_lc['HaloIndex'][
                    N_this_star_lc:N_this_star_lc +
                    N_this_noinfo_lc] = Merger_this_noinfo_lc['HaloIndex']
                del Merger_this_noinfo_lc

                # record information from previously loaded redshift that was postponed
                if i != ind_start or resume_flags[k, o]:
                    if N_next_lc != 0:
                        Merger_lc['InterpolatedPosition'][
                            -N_next_lc:] = Merger_next['InterpolatedPosition'][
                                'data'][:]
                        Merger_lc['InterpolatedVelocity'][
                            -N_next_lc:] = Merger_next['InterpolatedVelocity'][
                                'data'][:]
                        Merger_lc['InterpolatedComoving'][
                            -N_next_lc:] = Merger_next['InterpolatedComoving'][
                                'data'][:]
                        Merger_lc['HaloIndex'][-N_next_lc:] = Merger_next[
                            'HaloIndex']['data'][:]
                        del Merger_next
                    resume_flags[k, o] = False

                # offset position to make light cone continuous
                Merger_lc['InterpolatedPosition'] = offset_pos(
                    Merger_lc['InterpolatedPosition'],
                    ind_origin=o,
                    all_origins=origins)

                # create directory for this redshift
                os.makedirs(cat_lc_dir / ("z%.3f" % z_this), exist_ok=True)

                # write table with interpolated information
                save_asdf(Merger_lc, ("Merger_lc%d.%02d" % (o, k)), header,
                          cat_lc_dir / ("z%.3f" % z_this))

                # TODO: Need to make sure no bugs with eligibility, ask Lehman
                # version 1: only the main progenitor is marked ineligible
                # if halo belongs to this redshift catalog or the previous redshift catalog;
                eligibility_prev[
                    Merger_prev_main_this_info_lc['HaloIndex']] = False

                # version 2: all progenitors of halos belonging to this redshift catalog are marked ineligible
                # run version 1 AND 2 to mark ineligible Merger_next objects to avoid multiple entries
                # optimize with numba if possible (ask Lehman)
                # Note that some progenitor indices are zeros;
                # For best result perhaps combine Progs with MainProgs
                if "Progenitors" in fields_mt:
                    nums = Merger_this_info_lc['NumProgenitors'][
                        bool_star_this_info_lc]
                    starts = Merger_this_info_lc['StartProgenitors'][
                        bool_star_this_info_lc]
                    # for testing purposes (remove in final version)
                    main_progs = Merger_this_info_lc['HaloIndex'][
                        bool_star_this_info_lc]
                    # loop around halos that were marked belonging to this redshift catalog
                    for j in range(N_this_star_lc):
                        # select all progenitors
                        start = starts[j]
                        num = nums[j]
                        prog_inds = Progs_this[start:start + num]

                        # remove progenitors with no info
                        prog_inds = progs_inds[prog_inds > 0]
                        if len(prog_inds) == 0: continue

                        # correct halo indices
                        prog_inds = correct_inds(prog_inds, N_halos_slabs_prev,
                                                 slabs_prev, inds_fn_prev)
                        halo_inds = Merger_prev['HaloIndex'][prog_inds]

                        # test output; remove in final version
                        if j < 100:
                            print(halo_inds, Merger_prev[main_progs[j]])

                        # mark ineligible
                        eligibility_prev[halo_inds] = False

                # information to keep for next redshift considered
                N_next = np.sum(~bool_star_this_info_lc)
                Merger_next = Table({
                    'HaloIndex':
                    np.zeros(N_next, dtype=Merger_lc['HaloIndex'].dtype),
                    'InterpolatedVelocity':
                    np.zeros(N_next, dtype=(np.float32, 3)),
                    'InterpolatedPosition':
                    np.zeros(N_next, dtype=(np.float32, 3)),
                    'InterpolatedComoving':
                    np.zeros(N_next, dtype=np.float32)
                })
                Merger_next['HaloIndex'][:] = Merger_prev_main_this_info_lc[
                    'HaloIndex'][~bool_star_this_info_lc]
                Merger_next['InterpolatedVelocity'][:] = Merger_this_info_lc[
                    'InterpolatedVelocity'][~bool_star_this_info_lc]
                Merger_next['InterpolatedPosition'][:] = Merger_this_info_lc[
                    'InterpolatedPosition'][~bool_star_this_info_lc]
                Merger_next['InterpolatedComoving'][:] = Merger_this_info_lc[
                    'InterpolatedComoving'][~bool_star_this_info_lc]
                del Merger_this_info_lc, Merger_prev_main_this_info_lc

                if plot:
                    # select the halos in the light cones
                    pos_choice = Merger_lc['InterpolatedPosition']

                    # selecting thin slab
                    pos_x_min = -Lbox / 2. + k * (Lbox / n_chunks)
                    pos_x_max = x_min + (Lbox / n_chunks)

                    ijk = 0
                    choice = (pos_choice[:, ijk] >=
                              pos_x_min) & (pos_choice[:, ijk] < pos_x_max)

                    circle_this = plt.Circle((origins[0][1], origins[0][2]),
                                             radius=chi_this,
                                             color="g",
                                             fill=False)
                    circle_prev = plt.Circle((origins[0][1], origins[0][2]),
                                             radius=chi_prev,
                                             color="r",
                                             fill=False)

                    # clear things for fresh plot
                    ax = plt.gca()
                    ax.cla()

                    # plot particles
                    ax.scatter(pos_choice[choice, 1],
                               pos_choice[choice, 2],
                               s=0.1,
                               alpha=1.,
                               color="dodgerblue")

                    # circles for in and prev
                    ax.add_artist(circle_this)
                    ax.add_artist(circle_prev)
                    plt.xlabel([-1000, 3000])
                    plt.ylabel([-1000, 3000])
                    plt.axis("equal")
                    plt.show()

                gc.collect()

                # split the eligibility array over three files for the three chunks it's made up of
                offset = 0
                for idx in inds_fn_prev:
                    eligibility_prev_idx = eligibility_prev[
                        offset:offset + N_halos_slabs_prev[idx]]
                    # combine current information with previously existing
                    if os.path.exists(
                            cat_lc_dir / "tmp" /
                        ("eligibility_prev_z%4.3f_lc%d.%02d.npy" %
                         (z_prev, o, idx))):
                        eligibility_prev_old = np.load(
                            cat_lc_dir / "tmp" /
                            ("eligibility_prev_z%4.3f_lc%d.%02d.npy" %
                             (z_prev, o, idx)))
                        eligibility_prev_idx = eligibility_prev_old & eligibility_prev_idx
                        print("Exists!")
                    else:
                        print("Doesn't exist")
                    np.save(
                        cat_lc_dir / "tmp" /
                        ("eligibility_prev_z%4.3f_lc%d.%02d.npy" %
                         (z_prev, o, idx)), eligibility_prev_idx)
                    offset += N_halos_slabs_prev[idx]

                # write as table the information about halos that are part of next loaded redshift
                save_asdf(Merger_next,
                          ("Merger_next_z%4.3f_lc%d.%02d" % (z_prev, o, k)),
                          header, cat_lc_dir / "tmp")

                # save redshift of catalog that is next to load and difference in comoving between this and prev
                # TODO: save as txt file that gets appended to and then read the last line
                with open(cat_lc_dir / "tmp" / "tmp.log", "a") as f:
                    f.writelines([
                        "# Next iteration: \n",
                        "z_prev = %.8f \n" % z_prev,
                        "delta_chi = %.8f \n" % delta_chi,
                        "light_cone = %d \n" % o,
                        "super_slab = %d \n" % k
                    ])

            del Merger_this, Merger_prev

        # update values for difference in comoving distance
        delta_chi_old = delta_chi
Exemplo n.º 5
0
def main(sim_name,
         z_start,
         z_stop,
         compaso_parent,
         catalog_parent,
         merger_parent,
         save_pos=False,
         purge=False,
         complete=False,
         want_subsample_B=True):

    compaso_parent = Path(compaso_parent)
    catalog_parent = Path(catalog_parent)
    merger_parent = Path(merger_parent)

    # directory where the CompaSO halo catalogs are saved
    cat_dir = compaso_parent / sim_name / "halos"
    clean_dir = compaso_parent / "cleaning" / sim_name

    # obtain the redshifts of the CompaSO catalogs
    redshifts = glob.glob(os.path.join(cat_dir, "z*"))
    zs_cat = [extract_redshift(redshifts[i]) for i in range(len(redshifts))]

    # directory where we save the final outputs
    cat_lc_dir = catalog_parent / "halo_light_cones" / sim_name

    # directory where the merger tree files are kept
    merger_dir = merger_parent / sim_name

    # if merger tree redshift information has been saved, load it (if not, save it)
    if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
        np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
    zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")

    # names of the merger tree file for a given redshift
    merger_fns = list(merger_dir.glob("associations_z%4.3f.*.asdf" % zs_mt[0]))

    # number of superslabs
    n_superslabs = len(merger_fns)
    print("number of superslabs = ", n_superslabs)

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists(
            Path("data_headers") / sim_name /
            "coord_dist.npy") or not os.path.exists(
                Path("data_headers") / sim_name /
                "redshifts.npy") or not os.path.exists(
                    Path("data_headers") / sim_name / "eta_drift.npy"):
        zs_all, steps_all, chis_all, etad_all = get_lc_info(
            Path("all_headers") / sim_name)
        os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
        np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
        np.save(Path("data_headers") / sim_name / "steps.npy", steps_all)
        np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
        np.save(Path("data_headers") / sim_name / "eta_drift.npy", etad_all)
    zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
    chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
    etad_all = np.load(Path("data_headers") / sim_name / "eta_drift.npy")
    zs_all[-1] = float(
        "%.1f" % zs_all[-1]
    )  # LHG: I guess this is trying to match up to some filename or something?

    # fields to copy directly from the halo_info files
    raw_dic = {}
    with asdf.open(
            str(cat_dir / ("z%.3f" % zs_cat[0]) / 'halo_info' /
                'halo_info_000.asdf')) as f:
        for key in f['data'].keys():
            if 'L2' not in key: continue
            try:
                raw_dic[key] = (f['data'][key].dtype, f['data'][key].shape[1])
            except:
                raw_dic[key] = f['data'][key].dtype
        header = f['header']  # just for getting the name of the redshift

    # just for testing; remove for final version
    if want_subsample_B:
        fields_cat = [
            'npstartA', 'npoutA', 'npstartB', 'npoutB', 'N', 'v_L2com',
            'x_L2com'
        ]  #, 'id', 'x_L2com', 'sigmav3d_L2com', 'r90_L2com', 'r25_L2com']
        subsample_str = 'AB'
    else:
        fields_cat = ['npstartA', 'npoutA', 'N', 'v_L2com', 'x_L2com'
                      ]  #, 'id', 'sigmav3d_L2com', 'r90_L2com', 'r25_L2com']
        subsample_str = 'A'

    # main progenitor fields of interest
    fields_cat_mp = [
        'haloindex', 'haloindex_mainprog', 'v_L2com_mainprog', 'N_mainprog'
    ]

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all, chis_all)
    etad_of_chi = interp1d(chis_all, etad_all)
    z_of_chi = interp1d(chis_all, zs_all)

    # initial redshift where we start building the trees
    ind_start = np.argmin(np.abs(zs_mt - z_start))
    ind_stop = np.argmin(np.abs(zs_mt - z_stop))

    # directory where we save the current state
    os.makedirs(cat_lc_dir / "tmp", exist_ok=True)
    if purge:
        # delete the exisiting temporary files
        tmp_files = list((cat_lc_dir / "tmp").glob("haloindex_*"))
        for i in range(len(tmp_files)):
            os.unlink(str(tmp_files[i]))

    # loop over each merger tree redshift
    for i in range(ind_start, ind_stop + 1):

        # starting snapshot
        z_mt = zs_mt[i]
        z_mt_mp = zs_mt[i + 1]
        z_cat = zs_cat[np.argmin(np.abs(z_mt - zs_cat))]
        print("Redshift = %.3f %.3f" % (z_mt, z_cat))

        # the names of the folders need to be standardized
        zname_mt = min(header['L1OutputRedshifts'],
                       key=lambda z: abs(z - z_mt))

        # convert the redshifts into comoving distance
        chi_mt = chi_of_z(z_mt)
        chi_mt_mp = chi_of_z(z_mt_mp)

        # catalog directory
        catdir = cat_dir / ("z%.3f" % z_cat)

        # names of the merger tree file for this redshift
        merger_fns = list(merger_dir.glob("associations_z%4.3f.*.asdf" % z_mt))
        for counter in range(len(merger_fns)):
            merger_fns[counter] = str(merger_fns[counter])

        # slab indices and number of halos per slab
        N_halo_slabs, slabs = get_halos_per_slab(merger_fns, minified=False)
        N_halo_total = np.sum(N_halo_slabs)

        # names of the light cone merger tree file for this redshift
        merger_lc_fns = list(
            (cat_lc_dir / ("z%.3f" % zname_mt)).glob("Merger_lc*.asdf"))
        for counter in range(len(merger_lc_fns)):
            merger_lc_fns[counter] = str(merger_lc_fns[counter])

        # slab indices, origins and number of halos per slab
        N_halo_slabs_lc, slabs_lc, origins_lc = get_halos_per_slab_origin(
            merger_lc_fns, minified=False)

        # total number of halos in this light cone redshift
        N_lc = np.sum(N_halo_slabs_lc)
        print("total number of lc halos = ", N_lc)
        if N_lc == 0: continue

        # create a new dictionary with translations of merger names
        key_dic = {
            'HaloIndex': ['index_halo', np.int64],
            'InterpolatedPosition': ['pos_interp', (np.float32, 3)],
            'InterpolatedVelocity': ['vel_interp', (np.float32, 3)],
            'InterpolatedComoving': ['redshift_interp', np.float32],
            'LightConeOrigin': ['origin', np.int8],
        }

        # Merger_lc should have all fields (compaso + mainprog (not anymore) + interpolated)
        cols = {
            fields_cat[i]: np.zeros(N_lc, dtype=(user_dt[fields_cat[i]]))
            for i in range(len(fields_cat))
        }
        fields = []
        for i in range(len(fields_cat)):
            fields.append(fields_cat[i])

        # additional fields for the light cones
        for key in key_dic.keys():
            cols[key_dic[key][0]] = np.zeros(N_lc, dtype=key_dic[key][1])

        # updating the mainprog here
        with asdf.open(
                str(clean_dir / ("z%.3f" % z_cat) / 'cleaned_halo_info' /
                    'cleaned_halo_info_000.asdf')) as f:  # og
            # add mainprog stuff to the raw dictionary
            for key in fields_cat_mp:
                try:
                    raw_dic[key] = (f['data'][key].dtype,
                                    f['data'][key].shape[1])
                except:
                    raw_dic[key] = f['data'][key].dtype

        # adding the raw halo info fields
        for key in raw_dic.keys():
            cols[key] = np.zeros(N_lc, dtype=raw_dic[key])
        # adding interpolated mass
        cols['N_interp'] = np.zeros(N_lc, dtype=user_dt['N'])
        Merger_lc = Table(cols, copy=False)

        # if we want to complete to z = 0, then turn on complete for z = 0.1 (we don't have shells past that)
        if complete and np.abs(z_mt - 0.1) < 1.e-3:
            save_z0 = True
        else:
            save_z0 = False

        # initialize index for filling halo information
        start = 0
        file_no = 0

        # offset for correcting halo indices
        offset = 0

        # counts particles
        count = 0

        # loop over each superslab
        for k in range(n_superslabs):
            # assert superslab number is correct
            assert slabs[k] == k, "the superslabs are not matching"

            # origins for which information is available
            origins_k = origins_lc[slabs_lc == k]

            if len(origins_k) == 0:
                # offset all halos in given superslab
                offset += N_halo_slabs[k]
                continue

            # list of halo indices
            halo_info_list = []
            for i in [0, 1, -1]:
                # TESTING depending on whether B particles are in normal location or sownak's or mine
                #halo_info_list.append(str(catdir / 'halo_info' / ('halo_info_%03d.asdf'%((k+i)%n_superslabs)))) # og
                #halo_info_list.append(str(Path("/global/cscratch1/sd/sbose/subsample_B_particles") / sim_name / "halos"/ ("z%.3f"%z_cat) / 'halo_info' / ('halo_info_%03d.asdf'%((k+i)%n_superslabs)))) # tova e po-razlichno i se polzva kogato e cleaning/cleaned_halos (t.e. chasticite sa na mainata si)
                halo_info_list.append(
                    str(
                        Path(
                            "/global/cscratch1/sd/boryanah/data_hybrid/tape_data"
                        ) / sim_name / "halos" / ("z%.3f" % z_cat) /
                        'halo_info' / ('halo_info_%03d.asdf' %
                                       ((k + i) % n_superslabs)))
                )  # ako sownak si iztrie tupite chastici
            # adding merger tree fields
            cleaned_halo_info_list = []
            for i in [0, 1, -1]:
                cleaned_halo_info_list.append(
                    str(clean_dir / ("z%.3f" % z_cat) / 'cleaned_halo_info' /
                        ('cleaned_halo_info_%03d.asdf' %
                         ((k + i) % n_superslabs))))

            print("loading halo info files = ", halo_info_list)
            print("loading fields = ", fields)
            # load the CompaSO catalogs
            if (save_pos or save_z0):
                try:
                    cat = CompaSOHaloCatalog(
                        halo_info_list,
                        load_subsamples=f'{subsample_str:s}_halo_all',
                        fields=fields,
                        unpack_bits=False)
                    loaded_pos = True
                except:
                    cat = CompaSOHaloCatalog(
                        halo_info_list,
                        load_subsamples=f'{subsample_str:s}_halo_pid',
                        fields=fields,
                        unpack_bits=False)
                    loaded_pos = False
            else:
                cat = CompaSOHaloCatalog(
                    halo_info_list,
                    load_subsamples=f'{subsample_str:s}_halo_pid',
                    fields=fields,
                    unpack_bits=False,
                    cleandir=str(compaso_parent / "cleaning"))
                #cat = CompaSOHaloCatalog(halo_info_list, load_subsamples=f'{subsample_str:s}_halo_pid', fields=fields, unpack_bits=False, cleaned=False)
                loaded_pos = False

            # load the rest of the parameters in compressed format
            cols = {}
            for key in raw_dic.keys():
                cols[key] = np.zeros(len(cat.halos), dtype=raw_dic[key])
            compressed_data = Table(cols, copy=False)
            new_count = 0
            for i in range(len(halo_info_list)):
                with asdf.open(halo_info_list[i]) as f:
                    for key in f['data'].keys():
                        if key in compressed_data.keys():
                            compressed_data[key][new_count:new_count +
                                                 len(f['data'][key]
                                                     )] = f['data'][key][:]
                    new_count += len(f['data'][key])
            # adding merger tree fields
            new_count = 0
            for i in range(len(cleaned_halo_info_list)):
                with asdf.open(cleaned_halo_info_list[i]) as f:
                    for key in f['data'].keys():
                        if key in fields_cat_mp:
                            compressed_data[key][new_count:new_count +
                                                 len(f['data'][key]
                                                     )] = f['data'][key][:]
                    new_count += len(f['data'][key])

            # loop over each observer origin
            for o in origins_k:

                # number of halos in this file
                num = N_halo_slabs_lc[file_no]
                file_no += 1

                print("origin, superslab, N_halo_slabs_lc", o, k, num)
                # skip if none
                if num == 0: continue

                # load the light cone arrays
                with asdf.open(cat_lc_dir / ("z%.3f" % zname_mt) /
                               ("Merger_lc%d.%02d.asdf" % (o, k)),
                               lazy_load=True,
                               copy_arrays=True) as f:
                    merger_lc = f['data']

                # the files should be congruent
                N_halo_lc = len(merger_lc['HaloIndex'])
                assert N_halo_lc == num, "file order is messed up"

                # translate information from this file to the complete array
                for key in merger_lc.keys():
                    Merger_lc[key_dic[key][0]][start:start +
                                               num] = merger_lc[key][:]

                # adding information about which lightcone the halo belongs to
                Merger_lc['origin'][start:start + num] = np.repeat(
                    o, num).astype(np.int8)

                # halo index and velocity
                halo_ind_lc = Merger_lc['index_halo'][start:start + num]
                halo_ind_lc = correct_all_inds(halo_ind_lc, N_halo_slabs,
                                               slabs, n_superslabs)
                halo_ind_lc = (halo_ind_lc - offset) % N_halo_total
                vel_interp_lc = Merger_lc['vel_interp'][start:start + num]

                # correct halo indices
                correction = N_halo_slabs[k] + N_halo_slabs[
                    (k + 1) % n_superslabs] + N_halo_slabs[
                        (k - 1) % n_superslabs] - N_halo_total
                halo_ind_lc[halo_ind_lc > N_halo_total -
                            N_halo_slabs[(k - 1) % n_superslabs]] += correction

                # cut the halos that are not part of this catalog from the halo table
                halo_table = cat.halos[halo_ind_lc]

                header = cat.header
                N_halos = len(cat.halos)
                print("N_halos = ", N_halos)
                assert N_halos == N_halo_total + correction, "mismatch between halo number in compaso catalog and in merger tree"

                # cut the halos that are not part of this catalog from the compressed data
                compressed_data_o = compressed_data[halo_ind_lc]

                # load eligibility information if it exists
                if os.path.exists(cat_lc_dir / "tmp" /
                                  ("haloindex_z%4.3f_lc%d.%02d.npy" %
                                   (z_mt, o, k))):
                    haloindex_ineligible = np.load(
                        cat_lc_dir / "tmp" /
                        ("haloindex_z%4.3f_lc%d.%02d.npy" % (z_mt, o, k)))

                    # find the halos in halo_table that have been marked ineligible and get rid of them
                    mask_ineligible = np.in1d(compressed_data_o['haloindex'],
                                              haloindex_ineligible)

                    # decided this is bad cause of the particle indexing or rather the halo indexing that uses num and then the total number of particles
                    #halo_table = halo_table[mask_ineligible]
                    halo_table['N'][mask_ineligible] = 0
                    halo_table['npstartA'][
                        mask_ineligible] = -999  # note unsigned integer
                    halo_table['npoutA'][mask_ineligible] = 0
                    if want_subsample_B:
                        halo_table['npstartB'][
                            mask_ineligible] = -999  # note unsigned integer
                        halo_table['npoutB'][mask_ineligible] = 0
                    print(
                        "percentage surviving halos after eligibility = ",
                        100. *
                        (1 - np.sum(mask_ineligible) / len(mask_ineligible)))

                # load the particle ids
                pid = cat.subsamples['pid']
                if (save_pos or save_z0) and loaded_pos:
                    pos = cat.subsamples['pos']
                    vel = cat.subsamples['vel']

                # reindex npstart and npout for the new catalogs
                npstartA = halo_table['npstartA']
                npoutA = halo_table['npoutA']
                # select the pids in this halo light cone, and index into them starting from 0
                if want_subsample_B:
                    npstartB = halo_table['npstartB']
                    npoutB = halo_table['npoutB']

                    if (save_pos or save_z0) and loaded_pos:
                        pid_new, pos_new, vel_new, npstart_new, npout_new, npout_new_B = reindex_pid_pos_vel_AB(
                            pid, pos, vel, npstartA, npoutA, npstartB, npoutB)
                        del pid, pos, vel
                    else:
                        pid_new, npstart_new, npout_new, npout_new_B = reindex_pid_AB(
                            pid, npstartA, npoutA, npstartB, npoutB)
                        del pid
                    del npstartA, npoutA, npstartB, npoutB
                else:
                    if (save_pos or save_z0) and loaded_pos:
                        pid_new, pos_new, vel_new, npstart_new, npout_new = reindex_pid_pos_vel(
                            pid, pos, vel, npstartA, npoutA)
                        del pid, pos, vel
                    else:
                        pid_new, npstart_new, npout_new = reindex_pid(
                            pid, npstartA, npoutA)
                        del pid
                    del npstartA, npoutA

                # assert that indexing is right
                if want_subsample_B:
                    assert np.sum(npout_new + npout_new_B) == len(
                        pid_new), "mismatching indexing"
                else:
                    assert np.sum(npout_new) == len(
                        pid_new), "mismatching indexing"

                # offset for this superslab and origin
                Merger_lc['npstartA'][start:start + num] = npstart_new + count
                Merger_lc['npoutA'][start:start + num] = npout_new
                if want_subsample_B:
                    Merger_lc['npoutB'][start:start + num] = npout_new_B
                    del npout_new_B
                del npstart_new, npout_new

                # increment number of particles in superslab and origin
                count += len(pid_new)

                # create particle array
                if (save_pos or save_z0) and loaded_pos:
                    pid_table = Table({
                        'pid':
                        np.zeros(len(pid_new), pid_new.dtype),
                        'pos':
                        np.zeros((len(pid_new), 3), pos_new.dtype),
                        'vel':
                        np.zeros((len(pid_new), 3), vel_new.dtype)
                    })
                    pid_table['pid'] = pid_new
                    pid_table['pos'] = pos_new
                    pid_table['vel'] = vel_new
                    del pid_new, pos_new, vel_new
                else:
                    pid_table = Table(
                        {'pid': np.zeros(len(pid_new), pid_new.dtype)})
                    pid_table['pid'] = pid_new
                    del pid_new
                # save the particles
                save_asdf(pid_table, "pid_lc%d.%02d" % (o, k), header,
                          cat_lc_dir / ("z%4.3f" % zname_mt))
                del pid_table

                # for halos that did not have interpolation and get the velocity from the halo info files
                not_interp = (np.sum(np.abs(vel_interp_lc), axis=1) -
                              0.) < 1.e-6
                print("percentage not interpolated = ",
                      100. * np.sum(not_interp) / len(not_interp))
                vel_interp_lc[not_interp] = halo_table['v_L2com'][not_interp]

                # halos with merger tree info (0 for merged or smol, -999 for no info)
                mask_info = compressed_data_o['haloindex_mainprog'][:] > 0
                print("percentage without merger tree info = ",
                      100. * (1. - np.sum(mask_info) / len(mask_info)))
                print("percentage of removed halos = ",
                      np.sum(halo_table['N'] == 0) * 100. / len(mask_info))
                # I think that it may be possible that because in later redshifts (not z_start of build_mt),
                # we have halos from past times, so it is possible that at some point some halo had merger tree
                # info and then it got lost somewhere; also we have the new condition of going back half a lifetime
                # the first number is larger than the sum of the second two cause it contains other cases (split)
                assert np.sum(~mask_info) >= np.sum(not_interp) + np.sum(
                    halo_table['N'] == 0
                ), "Different number of halos with merger tree info and halos that have been interpolated"
                del not_interp

                # interpolated velocity v = v1 + (v2-v1)/(chi1-chi2)*(chi-chi2) because -d(chi) = d(eta)
                a_avg = (halo_table['v_L2com'] -
                         compressed_data_o['v_L2com_mainprog']) / (chi_mt_mp -
                                                                   chi_mt)
                v_star = compressed_data_o['v_L2com_mainprog'] + a_avg * (
                    chi_mt_mp - merger_lc['InterpolatedComoving'][:, None])
                vel_interp_lc[mask_info] = v_star[mask_info]
                del a_avg, v_star

                # save the velocity information
                Merger_lc['vel_interp'][start:start + num] = vel_interp_lc
                del vel_interp_lc

                # interpolated mass m = m1 + (m2-m1)/(chi1-chi2)*(chi-chi2) because dt = -dchi
                # compute the derivative
                try:
                    mdot = (halo_table['N'].astype(float) -
                            compressed_data_o['N_mainprog'][:, 0].astype(float)
                            ) / (chi_mt_mp - chi_mt)
                    m_star = compressed_data_o['N_mainprog'][:, 0].astype(
                        float) + mdot * (chi_mt_mp -
                                         merger_lc['InterpolatedComoving'])
                except:
                    # this is only needed if you are using the last available redshift for which N_mainprog is 1D
                    mdot = (halo_table['N'].astype(float) -
                            compressed_data_o['N_mainprog'].astype(float)) / (
                                chi_mt_mp - chi_mt)
                    m_star = compressed_data_o['N_mainprog'].astype(
                        float) + mdot * (chi_mt_mp -
                                         merger_lc['InterpolatedComoving'])

                # getting rid of negative masses which occur for halos with mass today = 0 or halos that come from the previous redshift (i.e. 1/2 to 1 and not 1 to 3/2)
                m_star[m_star < 0.] = 0.
                m_star = np.round(m_star).astype(halo_table['N'].dtype)
                # record the interpolated mass for each halo
                Merger_lc['N_interp'][start:start +
                                      num][mask_info] = m_star[mask_info]

                # mark the halos that don't have merger tree info
                Merger_lc['origin'][start:start + num][~mask_info] += 3

                # for these halos, we can pseudo interpolate their position but keep the mass unchanged
                Merger_lc['N_interp'][start:start + num][
                    ~mask_info] = halo_table['N'][~mask_info]
                # buba's try
                #Merger_lc['pos_interp'][start:start+num][~mask_info] = merger_lc['InterpolatedPosition'][~mask_info]# + halo_table['v_L2com'][~mask_info]*(chi_mt - merger_lc['InterpolatedComoving'][:, None])[~mask_info]
                # simulation particle with canonical velocity v1 drifting from z1 to z2, advance the position as: x2 = x1 + v1*(etaD(z2) - etaD(z1)). The eta_Ds are the drift factors, computed as \Delta etaD = \int_t1^t2 dt/a^2 and are stored in the state headers, with velocities in canonical units, and x1 and x2 in unit-box comoving coords.
                tmp = (merger_lc['InterpolatedComoving'][~mask_info])
                tmp[tmp < np.min(chis_all)] = np.min(chis_all)
                merger_lc['InterpolatedComoving'][~mask_info] = tmp
                del tmp
                Merger_lc['pos_interp'][start:start + num][~mask_info] = (
                    merger_lc['InterpolatedPosition'][~mask_info] /
                    header['BoxSizeHMpc'] +
                    compressed_data_o['v_L2com'][~mask_info] *
                    header['VelZSpace_to_Canonical'] *
                    (etad_of_chi(merger_lc['InterpolatedComoving'][~mask_info,
                                                                   None]) -
                     etad_of_chi(chi_mt))) * header['BoxSizeHMpc']
                # + halo_table['v_L2com'][~mask_info]*(chi_mt - merger_lc['InterpolatedComoving'][:, None])[~mask_info]

                # units -- todo: test
                del m_star, mdot

                # copy the rest of the halo fields
                for key in fields_cat:
                    # from the CompaSO fields, those have already been reindexed
                    if key == 'npstartA' or key == 'npoutA': continue
                    if key == 'npstartB' or key == 'npoutB': continue
                    Merger_lc[key][start:start + num] = halo_table[key][:]

                # copy all L2com compressed fields to Merger_lc
                for key in compressed_data.keys():
                    Merger_lc[key][start:start +
                                   num] = compressed_data_o[key][:]

                # save information about halos that were used in this catalog and have merger tree information
                np.save(
                    cat_lc_dir / "tmp" / ("haloindex_z%4.3f_lc%d.%02d.npy" %
                                          (z_mt_mp, o, k)),
                    compressed_data_o['haloindex_mainprog'][mask_info])
                del mask_info
                del halo_table

                # add halos in this file
                start += num

            # offset all halos in given superslab
            offset += N_halo_slabs[k]
            del cat

        assert len(Merger_lc['redshift_interp']
                   ) == start, "Are you missing some halos?"
        # since at z = 0.1 some of the values are too low
        Merger_lc['redshift_interp'][
            Merger_lc['redshift_interp'] < np.min(chis_all)] = np.min(chis_all)
        Merger_lc['redshift_interp'] = z_of_chi(
            Merger_lc['redshift_interp']).astype(np.float32)

        # save to files
        save_asdf(Merger_lc, "halo_info_lc", header,
                  cat_lc_dir / ("z%4.3f" % zname_mt))
        del Merger_lc

        # loop over each superslab
        file_no = 0
        offset = 0
        for k in range(n_superslabs):
            # origins for which information is available
            origins_k = origins_lc[slabs_lc == k]

            # loop over each observer origin
            for o in origins_k:

                with asdf.open(cat_lc_dir / ("z%4.3f" % zname_mt) /
                               ("pid_lc%d.%02d.asdf" % (o, k)),
                               lazy_load=True,
                               copy_arrays=True) as f:
                    pid_lc = f['data']['pid'][:]
                    if (save_pos or save_z0) and loaded_pos:
                        pos_lc = f['data']['pos'][:]
                        vel_lc = f['data']['vel'][:]
                if file_no == 0:
                    if (save_pos or save_z0) and loaded_pos:
                        pid_table = Table({
                            'pid':
                            np.zeros(count, pid_lc.dtype),
                            'pos':
                            np.zeros((count, 3), pos_lc.dtype),
                            'vel':
                            np.zeros((count, 3), vel_lc.dtype)
                        })
                    else:
                        pid_table = Table(
                            {'pid': np.zeros(count, pid_lc.dtype)})

                pid_table['pid'][offset:offset + len(pid_lc)] = pid_lc
                if (save_pos or save_z0) and loaded_pos:
                    pid_table['pos'][offset:offset + len(pid_lc)] = pos_lc
                    pid_table['vel'][offset:offset + len(pid_lc)] = vel_lc
                file_no += 1
                offset += len(pid_lc)
        assert offset == count, "Missing particles somewhere"
        save_asdf(pid_table, "pid_lc", header,
                  cat_lc_dir / ("z%4.3f" % zname_mt))

        gc.collect()
Exemplo n.º 6
0
def main(sim_name,
         z_lowest,
         z_highest,
         light_cone_parent,
         catalog_parent,
         merger_parent,
         resume=False,
         want_subsample_B=True):
    """
    Main algorithm: for each step in the light cone files, figure out the two closest halo light cone catalogs and load relevant information
    from these into the "currently_loaded" lists (if the step is far from the midpoint between the two, load only a single redshift catalog). 
    Then figure out which are the step files associated with the current step (1 to 3) and load the redshift catalogs corresponding to this
    step (1 or 2) from the "currently_loaded" lists. Then consider all combinations of light cone origins and redshift catalog origins 
    (the largest overlap will be for 0 and 0, 1 and 1, 2 and 2, but there will be a small number of halos on the boundary between the 
    original box and the two copies, so this is an effort to find particles that have migrated across the border). To speed up the process
    of matching the halo particles to the light cone particles, we have included another condition that selects only those particles in the
    halo light cone catalog that are a distance from the observer of only +/- 10 Mpc/h around the mean comoving distance of the current step.
    """
    # turn light cone, halo catalog and merger tree paths into Path objects
    light_cone_parent = Path(light_cone_parent)
    catalog_parent = Path(catalog_parent)
    merger_parent = Path(merger_parent)

    # directory where the merger tree files are kept
    merger_dir = merger_parent / sim_name
    header = get_one_header(merger_dir)

    # physical location of the observer (original box origin)
    observer_origin = (np.array(header['LightConeOrigins'],
                                dtype=np.float32).reshape(-1, 3))[0]
    print("observer origin = ", observer_origin)

    # simulation parameters
    Lbox = header['BoxSize']
    PPD = header['ppd']

    # directory where we have saved the final outputs from merger trees and halo catalogs
    cat_lc_dir = catalog_parent / "halo_light_cones" / sim_name

    # directory where light cones are saved
    lc_dir = light_cone_parent / sim_name / "lightcones"

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done
    if not os.path.exists(
            Path("data_headers") / sim_name /
            "coord_dist.npy") or not os.path.exists(
                Path("data_headers") / sim_name /
                "redshifts.npy") or not os.path.exists(
                    Path("data_headers") / sim_name /
                    "steps.npy") or not os.path.exists(
                        Path("data_headers") / sim_name / "eta_drift.npy"):
        zs_all, steps_all, chis_all, etad_all = get_lc_info(
            Path("all_headers") / sim_name)
        os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
        np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
        np.save(Path("data_headers") / sim_name / "steps.npy", steps_all)
        np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
        np.save(Path("data_headers") / sim_name / "eta_drift.npy", etad_all)
    zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
    steps_all = np.load(Path("data_headers") / sim_name / "steps.npy")
    chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
    etad_all = np.load(Path("data_headers") / sim_name / "eta_drift.npy")
    zs_all[-1] = float("%.1f" % zs_all[-1])

    # if merger tree redshift information has been saved, load it (if not, save it)
    if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
        np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
    zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")

    # correct out of bounds error for interpolation
    zs_mt = zs_mt[(zs_mt <= zs_all.max()) & (zs_mt >= zs_all.min())]

    # time step of furthest and closest shell in the light cone files
    step_min = np.min(steps_all)
    step_max = np.max(steps_all)

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all, chis_all)
    z_of_chi = interp1d(chis_all, zs_all)

    # conformal distance of the mtree catalogs
    chis_mt = chi_of_z(zs_mt)

    # Read light cone file names
    lc_rv_fns = sorted(glob.glob(os.path.join(lc_dir, 'rv/LightCone*')))
    lc_pid_fns = sorted(glob.glob(os.path.join(lc_dir, 'pid/LightCone*')))

    # select the final and initial step for computing the convergence map
    step_start = steps_all[np.argmin(np.abs(zs_all - z_highest))]
    step_stop = steps_all[np.argmin(np.abs(zs_all - z_lowest))]
    print("step_start = ", step_start)
    print("step_stop = ", step_stop)

    # these are the time steps associated with each of the light cone files
    step_fns = np.zeros(len(lc_pid_fns), dtype=int)
    for i in range(len(lc_pid_fns)):
        step_fns[i] = extract_steps(lc_pid_fns[i])

    # directory where we save the current state if we want to resume
    os.makedirs(cat_lc_dir / "tmp", exist_ok=True)
    if resume:
        # check if resuming from an old state
        infile = InputFile(cat_lc_dir / "tmp" / "match.log")
        z_last = infile.z_last
        assert (
            np.abs(z_last - z_highest) <= 2.e-1
        ), "Your recorded state is not for the currently requested redshift, can't resume from old. Last recorded state is z = %.3f" % z_last
    else:
        z_last = -1
        if os.path.exists(cat_lc_dir / "tmp" / "match.log"):
            os.unlink(cat_lc_dir / "tmp" / "match.log")

    # initialize previously loaded mt file name
    currently_loaded_zs = []
    currently_loaded_znames = []
    currently_loaded_headers = []
    currently_loaded_npouts = []
    currently_loaded_origin_edge = []
    currently_loaded_dist = []
    currently_loaded_pids = []
    currently_loaded_tables = []

    # loop through all selected steps
    for step in range(step_start, step_stop + 1):

        # adjust the indexing using j
        j = step - step_min
        step_this = steps_all[j]
        z_this = zs_all[j]
        chi_this = chis_all[j]
        assert step_this == step, "You've messed up the step counts"
        print("light cones step, redshift = ", step_this, z_this)

        # get the two redshifts it's straddling, their file names (of particles and halos), and their comoving values
        mt_fns, mt_zs, mt_znames, mt_chis, halo_mt_fns = get_mt_fns(
            z_this, zs_mt, chis_mt, cat_lc_dir, header)

        # get the mean chi
        mt_chi_mean = np.mean(mt_chis)

        # how many shells are we including on both sides, including mid point (total of 2 * buffer_no + 1)
        buffer_no = 2  # turns out we miss some sometimes with 1 # 1 should be enough and it spares time

        # is this the redshift that's closest to the bridge between two redshifts?
        mid_bool = (np.argmin(np.abs(mt_chi_mean - chis_all))
                    <= j + buffer_no) & (np.argmin(
                        np.abs(mt_chi_mean - chis_all)) >= j - buffer_no)

        # TESTING
        #mid_bool = True

        # if not in between two redshifts, we just need one catalog -- the one it is closest to; else keep both
        if not mid_bool:
            mt_fns = [mt_fns[np.argmin(np.abs(mt_chis - chi_this))]]
            halo_mt_fns = [halo_mt_fns[np.argmin(np.abs(mt_chis - chi_this))]]
            mt_zs = [mt_zs[np.argmin(np.abs(mt_chis - chi_this))]]
        print("using redshifts = ", mt_zs)

        # if we have loaded two zs but are only using one, that means that we are past the mid-point and can record the first one
        if len(currently_loaded_zs) > len(mt_zs):
            print("We will be dismissing z = ", mt_zs[0])
            dismiss = True
        else:
            dismiss = False

        # load the two (or one) straddled merger tree files and store them into lists of currently loaded things; record one of them if it's time
        for i in range(len(mt_fns)):

            # discard the old redshift catalog and record its data
            if dismiss:
                # check whether we are resuming and whether this is the redshift last written into the log file
                if resume and np.abs(currently_loaded_zs[0] - z_last) < 1.e-6:
                    print(
                        "This redshift (z = %.3f) has already been recorded, skipping"
                        % z_last)
                else:
                    # save the information about that redshift into asdf file
                    # TESTING
                    save_asdf(
                        currently_loaded_tables[0], "pid_rv_lc",
                        currently_loaded_headers[0],
                        cat_lc_dir / ("z%4.3f" % currently_loaded_znames[0]))
                    print("saved catalog = ", currently_loaded_zs[0])

                    # record the write-out into the log file
                    with open(cat_lc_dir / "tmp" / "match.log", "a") as f:
                        f.writelines([
                            "# Last saved redshift: \n",
                            "z_last = %.8f \n" % currently_loaded_zs[0]
                        ])

                # discard this first entry (aka the one being written out) from the lists of currently loaded things
                currently_loaded_zs = currently_loaded_zs[1:]
                currently_loaded_znames = currently_loaded_znames[1:]
                currently_loaded_headers = currently_loaded_headers[1:]
                currently_loaded_pids = currently_loaded_pids[1:]
                currently_loaded_origin_edge = currently_loaded_origin_edge[1:]
                currently_loaded_dist = currently_loaded_dist[1:]
                currently_loaded_npouts = currently_loaded_npouts[1:]
                currently_loaded_tables = currently_loaded_tables[1:]
                gc.collect()

            # check if catalog is already loaded and don't load if so
            if mt_zs[i] in currently_loaded_zs:
                print("skipped loading catalog ", mt_zs[i])
                continue

            # load new merger tree catalog
            halo_mt_npout = load_mt_npout(halo_mt_fns[i])
            if want_subsample_B:
                halo_mt_npout += load_mt_npout_B(halo_mt_fns[i])
            halo_mt_origin_edge = load_mt_origin_edge(halo_mt_fns[i], Lbox)
            mt_origin_edge = np.repeat(halo_mt_origin_edge,
                                       halo_mt_npout,
                                       axis=0)
            del halo_mt_origin_edge
            gc.collect()
            halo_mt_dist = load_mt_dist(halo_mt_fns[i], observer_origin)
            mt_dist = np.repeat(halo_mt_dist, halo_mt_npout, axis=0)
            del halo_mt_dist
            # remove npouts unless applying Lehman's idea
            del halo_mt_npout
            gc.collect()
            mt_pid, header = load_mt_pid(mt_fns[i], Lbox, PPD)

            # start the light cones table for this redshift
            lc_table_final = Table({
                'pid':
                np.zeros(
                    len(mt_pid), dtype=mt_pid.dtype
                ),  # could optimize further by doing empty here, but need to think what pid == 0 means
                'pos':
                np.empty(len(mt_pid), dtype=(np.float32, 3)),
                'vel':
                np.empty(len(mt_pid), dtype=(np.float32, 3)),
                #'redshift': np.zeros(len(mt_pid), dtype=np.float16),
            })

            # append the newly loaded catalog
            currently_loaded_zs.append(mt_zs[i])
            currently_loaded_znames.append(mt_znames[i])
            currently_loaded_headers.append(header)
            currently_loaded_pids.append(mt_pid)
            currently_loaded_dist.append(mt_dist)
            currently_loaded_origin_edge.append(mt_origin_edge)
            currently_loaded_tables.append(lc_table_final)
            # Useful for Lehman's
            #currently_loaded_npouts.append(halo_mt_npout)
        print("currently loaded redshifts = ", currently_loaded_zs)

        # find all light cone file names that correspond to this time step
        choice_fns = np.where(step_fns == step_this)[0]

        # number of light cones at this step
        num_lc = len(choice_fns)
        assert (num_lc <= 3) & (
            num_lc > 0
        ), "There can be at most three files in the light cones corresponding to a given step"

        # loop through those one to three light cone files
        for choice_fn in choice_fns:
            print("light cones file = ", lc_pid_fns[choice_fn])

            # load particles in light cone
            lc_pid, lc_rv = load_lc_pid_rv(lc_pid_fns[choice_fn],
                                           lc_rv_fns[choice_fn], Lbox, PPD)

            # sorting to speed up the matching
            i_sort_lc_pid = np.argsort(lc_pid)
            lc_pid = lc_pid[i_sort_lc_pid]
            lc_rv = lc_rv[i_sort_lc_pid]
            del i_sort_lc_pid
            gc.collect()

            # what are the offsets for each of the origins
            if 'LightCone1' in lc_pid_fns[choice_fn]:
                offset_lc = np.array([0., 0., Lbox], dtype=np.float32)
                origin = 1
            elif 'LightCone2' in lc_pid_fns[choice_fn]:
                offset_lc = np.array([0., Lbox, 0.], dtype=np.float32)
                origin = 2
            else:
                offset_lc = np.array([0., 0., 0.], dtype=np.float32)
                origin = 0

            # loop over the one or two closest catalogs
            for i in range(len(mt_fns)):

                # define variables for each of the currently loaded lists
                which_mt = np.where(mt_zs[i] == currently_loaded_zs)[0]
                mt_pid = currently_loaded_pids[which_mt[0]]
                mt_origin_edge = currently_loaded_origin_edge[which_mt[0]]
                mt_dist = currently_loaded_dist[which_mt[0]]
                header = currently_loaded_headers[which_mt[0]]
                lc_table_final = currently_loaded_tables[which_mt[0]]
                mt_z = currently_loaded_zs[which_mt[0]]
                # useful for Lehman's
                #halo_mt_npout = currently_loaded_npouts[which_mt[0]]

                # which origins are available for this merger tree file
                origins = np.unique(
                    np.log2(mt_origin_edge).astype(np.int8) - 4)
                print("unqiue origins = ", origins)

                # adding another condition to reduce the number of particles considered (spatial position of the halos in relation to the particles in the light cone)
                # og
                cond_dist = (mt_dist < chi_this + 10.) & (mt_dist >
                                                          chi_this - 10.)
                del mt_dist
                gc.collect()
                if np.sum(cond_dist) == 0:
                    continue

                # loop through each of the available origins
                for o in origins:
                    # consider boundary conditions (can probably be sped up if you say if origin 0 and o 1 didn't find anyone, don't check o 0 and o 1, 2
                    if o == origin:
                        #condition = mt_origins == o
                        condition = mt_origin_edge & 2**(o + 4) > 0
                    elif origin == 0 and o == 1:
                        #condition = (mt_origins == o) & (mt_cond_edge & 1 > 0)
                        condition = (mt_origin_edge & 2**(o + 4) >
                                     0) & (mt_origin_edge & 1 > 0)
                    elif origin == 0 and o == 2:
                        #condition = (mt_origins == o) & (mt_cond_edge & 2 > 0)
                        condition = (mt_origin_edge & 2**(o + 4) >
                                     0) & (mt_origin_edge & 2 > 0)
                    elif origin == 1 and o == 0:
                        #condition = (mt_origins == o) & (mt_cond_edge & 4 > 0)
                        condition = (mt_origin_edge & 2**(o + 4) >
                                     0) & (mt_origin_edge & 4 > 0)
                    elif origin == 2 and o == 0:
                        #condition = (mt_origins == o) & (mt_cond_edge & 8 > 0)
                        condition = (mt_origin_edge & 2**(o + 4) >
                                     0) & (mt_origin_edge & 8 > 0)
                    elif origin == 1 and o == 2:
                        continue
                    elif origin == 2 and o == 1:
                        continue
                    condition &= cond_dist
                    if np.sum(condition) == 0:
                        print("skipped", origin, o)
                        continue

                    print("origin and o, percentage of particles = ", origin,
                          o,
                          np.sum(condition) / len(condition))

                    # match the pids in the merger trees and the light cones selected by the above conditions
                    '''
                    # og I think this is slower than what is below
                    inds_mt_pid = np.arange(len(mt_pid))[condition]
                    mt_in_lc = match(mt_pid[inds_mt_pid], lc_pid, arr2_sorted=True) #, arr2_index=i_sort_lc_pid) # commented out to spare time
                    comm2 = mt_in_lc[mt_in_lc > -1]
                    comm1 = (np.arange(len(mt_pid), dtype=np.int32)[condition])[mt_in_lc > -1]
                    del condition
                    gc.collect()
                    del mt_in_lc
                    gc.collect()
                    '''

                    # match merger tree and light cone pids
                    print("starting")
                    t1 = time.time()
                    comm1, comm2 = match_srt(
                        mt_pid[condition], lc_pid, condition
                    )  # can be sped up because mp_pid[condition] creates a copy and same below
                    del condition
                    gc.collect()
                    print("time = ", time.time() - t1)

                    # select the intersected positions and velocities
                    pos_mt_lc, vel_mt_lc = unpack_rvint(lc_rv[comm2],
                                                        boxsize=Lbox)
                    del comm2
                    gc.collect()

                    # select the intersected pids
                    pid_mt_lc = mt_pid[comm1]

                    # print percentage of matched pids
                    print("at z = %.3f, matched = " % mt_z,
                          len(comm1) * 100. / (len(mt_pid)))
                    # original version end
                    '''
                    # alternative Lehman implementation start
                    t1 = time.time()
                    comm1, nmatch, hrvint = match_halo_pids_to_lc_rvint(halo_mt_npout, mt_pid, lc_rv, lc_pid)
                    print("at z = %.3f, matched = "%mt_z,len(hrvint)*100./(len(mt_pid)))
                    print("time = ", time.time()-t1)
                
                    pos_mt_lc, vel_mt_lc = unpack_rvint(hrvint,Lbox)
                    pid_mt_lc = mt_pid[comm1]                
                    # alternative Lehman implementation end
                    '''

                    # offset particle positions depending on which light cone we are at
                    pos_mt_lc += offset_lc

                    # save the pid, position, velocity and redshift
                    lc_table_final['pid'][comm1] = pid_mt_lc
                    lc_table_final['pos'][comm1] = pos_mt_lc
                    lc_table_final['vel'][comm1] = vel_mt_lc
                    #lc_table_final['redshift'][comm1] = np.full_like(pid_mt_lc, z_this, dtype=np.float16)
                    del pid_mt_lc, pos_mt_lc, vel_mt_lc, comm1
                    gc.collect()

                del mt_pid, mt_origin_edge, cond_dist, lc_table_final
                gc.collect()

            print("-------------------")
            del lc_pid, lc_rv
            gc.collect()

    # close the two that are currently open
    for i in range(len(currently_loaded_zs)):

        # save the information about that redshift into an asdf
        save_asdf(currently_loaded_tables[0], "pid_rv_lc",
                  currently_loaded_headers[0],
                  cat_lc_dir / ("z%4.3f" % currently_loaded_znames[0]))
        print("saved catalog = ", currently_loaded_zs[0])

        # record to the log file
        with open(cat_lc_dir / "tmp" / "match.log", "a") as f:
            f.writelines([
                "# Last saved redshift: \n",
                "z_last = %.8f \n" % currently_loaded_zs[0]
            ])

        # discard the first instance from the currently loaded lists of things
        currently_loaded_zs = currently_loaded_zs[1:]
        currently_loaded_znames = currently_loaded_znames[1:]
        currently_loaded_headers = currently_loaded_headers[1:]
        currently_loaded_pids = currently_loaded_pids[1:]
        currently_loaded_origin_edge = currently_loaded_origin_edge[1:]
        currently_loaded_npouts = currently_loaded_npouts[1:]
        currently_loaded_tables = currently_loaded_tables[1:]
        gc.collect()
Exemplo n.º 7
0
def main(sim_name, z_start, z_stop, compaso_parent, catalog_parent, resume=False, plot=False):
    
    # TODO: copy halo info (just get rid of fields=fields_cat);  velocity interpolation (could be done when velocities are summoned, maybe don't interpolate); delete things properly; read parameters from header;

    # directory where the CompaSO halo catalogs are saved
    cat_dir = compaso_parent / sim_name / "halos"

    # directory where we save the final outputs
    cat_lc_dir = catalog_parent / sim_name / "halos_light_cones"

    # more accurate, slightly slower
    if not os.path.exists("data/zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        np.save("data/zs_mt.npy", zs_mt)
    zs_mt = np.load("data/zs_mt.npy")

    # number of chunks
    n_chunks = len(list(merger_dir.glob("associations_z%4.3f.*.asdf"%zs_mt[0])))
    print("number of chunks = ",n_chunks)

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists("data_headers/coord_dist.npy") or not os.path.exists("data_headers/redshifts.npy"):
        zs_all, steps, chis_all = get_lc_info("all_headers")
        np.save("data_headers/redshifts.npy", zs_all)
        np.save("data_headers/coord_dist.npy", chis_all)
    zs_all = np.load("data_headers/redshifts.npy")
    chis_all = np.load("data_headers/coord_dist.npy")
    zs_all[-1] = float("%.1f" % zs_all[-1])  # LHG: I guess this is trying to match up to some filename or something?

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all,chis_all)
    z_of_chi = interp1d(chis_all,zs_all)

    # fields to extract from the CompaSO catalogs
    fields_cat = ['id','npstartA','npoutA','N','x_L2com','v_L2com','sigmav3d_L2com']

    # obtain the redshifts of the CompaSO catalogs
    redshifts = glob.glob(os.path.join(cat_dir,"z*"))
    zs_cat = [extract_redshift(redshifts[i]) for i in range(len(redshifts))]
    print(zs_cat)

    # initial redshift where we start building the trees
    ind_start = np.argmin(np.abs(zs_mt-z_start))
    ind_stop = np.argmin(np.abs(zs_mt-z_stop))

    # loop over each merger tree redshift
    for i in range(ind_start,ind_stop+1):
        # for o and for chunk tuks
        
        # starting snapshot
        z_in = zs_mt[i]
        z_cat = zs_cat[np.argmin(np.abs(z_in-zs_cat))]
        print("Redshift = %.3f %.3f"%(z_in,z_cat))

        # load the light cone arrays # tuks
        table_lc = np.load(os.path.join(cat_lc_dir,"z%.3f"%z_in,'table_lc.npy'))
        halo_ind_lc = table_lc['halo_ind']
        pos_interp_lc = table_lc['pos_interp']
        vel_interp_lc = table_lc['vel_interp']
        chi_interp_lc = table_lc['chi_interp']

        # catalog directory # tuks
        catdir = cat_dir / "z%.3f"%z_cat

        # load halo catalog, setting unpack to False for speed
        cat = CompaSOHaloCatalog(catdir, load_subsamples='A_halo_pid', fields=fields_cat, unpack_bits = False)

        # in the event where we have more than one copies of the box, need to make sure that halo index is still within N_halo
        halo_ind_lc %= len(cat.halos)

        # halo catalog
        halo_table = cat.halos[halo_ind_lc]
        header = cat.header
        N_halos = len(cat.halos)
        print("N_halos = ",N_halos)

        # load the pid, set unpack_bits to True if you want other information
        pid = cat.subsamples['pid']
        npstart = halo_table['npstartA']
        npout = halo_table['npoutA']
        pid_new, npstart_new, npout_new = reindex_particles(pid,npstart,npout)        
        pid_table = np.empty(len(pid_new),dtype=[('pid',pid_new.dtype)])
        pid_table['pid'] = pid_new
        halo_table['npstartA'] = npstart_new
        halo_table['npoutA'] = npout_new

        # isolate halos that did not have interpolation and get the velocity from the halo info files
        not_interp = (np.sum(np.abs(vel_interp_lc),axis=1) - 0.) < 1.e-6
        vel_interp_lc[not_interp] = halo_table['v_L2com'][not_interp]
        print("percentage not interpolated = ", 100.*np.sum(not_interp)/len(not_interp))

        # append new fields
        halo_table['index_halo'] = halo_ind_lc
        halo_table['pos_interp'] = pos_interp_lc
        halo_table['vel_interp'] = vel_interp_lc
        halo_table['redshift_interp'] = z_of_chi(chi_interp_lc)

        # save to files
        save_asdf(halo_table, "halo_info_lc", header, cat_lc_dir / "z%4.3f"%z_in)
        save_asdf(pid_table, "pid_lc", header, cat_lc_dir / "z%4.3f"%z_in)

        # delete things at the end
        del pid, pid_new, pid_table, npstart, npout, npstart_new, npout_new
        del halo_table
        del cat

        gc.collect()