Exemplo n.º 1
0
def test_unpack_bits():
    '''Test unpack_bits
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog
    from abacusnbody.data.bitpacked import PID_FIELDS

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=True,
                             unpack_bits=True,
                             fields='N')
    assert set(PID_FIELDS) <= set(cat.subsamples.colnames)  # check subset

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=True,
                             unpack_bits='density',
                             fields='N')
    assert 'density' in cat.subsamples.colnames
    assert 'lagr_pos' not in cat.subsamples.colnames  # too many?

    # bad bits field name
    with pytest.raises(ValueError):
        cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                                 subsamples=True,
                                 unpack_bits=['blah'],
                                 fields='N')
Exemplo n.º 2
0
def test_subsamples_clean(tmp_path):
    '''Test loading particle subsamples
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=True,
                             fields='all',
                             cleaned=True)

    # to regenerate reference
    #ref = cat.subsamples
    #import asdf; asdf.compression.set_compression_options(typesize='auto')
    #ref.write(PARTICLES_OUTPUT_CLEAN, format='asdf', all_array_storage='internal', all_array_compression='blsc')

    ref = Table.read(PARTICLES_OUTPUT_CLEAN)

    ss = cat.subsamples
    for col in ref.colnames:
        assert check_close(ref[col], ss[col])

    # total number of particles in ref should be equal to the sum total of npout{AB} in EXAMPLE_SIM
    assert len(ref) == np.sum(cat.halos['npoutA']) + np.sum(
        cat.halos['npoutB'])

    assert cat.subsamples.meta == ref.meta
Exemplo n.º 3
0
def test_field_subset_loading():
    '''Test loading a subset of halo catalog columns
    '''
    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(os.path.join(EXAMPLE_SIM, 'halos', 'z0.000'), fields=['N','x_com'])
    assert set(cat.halos.colnames) == set(['N','x_com'])
Exemplo n.º 4
0
def test_halo_lc():
    '''Test loading halo light cones
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(
        curdir /
        'halo_light_cones/AbacusSummit_base_c000_ph001-abridged/z2.250/',
        fields='all',
        subsamples=True)
    assert (cat.halo_lc == True)

    HALO_LC_CAT = refdir / 'halo_lc_cat.asdf'
    HALO_LC_SUBSAMPLES = refdir / 'halo_lc_subsample.asdf'

    # generate reference
    #ref = cat.halos
    #ref.write(HALO_LC_CAT, format='asdf', all_array_storage='internal', all_array_compression='blsc')

    #ref = cat.subsamples
    #ref.write(HALO_LC_SUBSAMPLES, format='asdf', all_array_storage='internal', all_array_compression='blsc')

    ref = Table.read(HALO_LC_CAT)
    halos = cat.halos
    for col in ref.colnames:
        assert check_close(ref[col], halos[col])
    assert halos.meta == ref.meta

    ref = Table.read(HALO_LC_SUBSAMPLES)
    ss = cat.subsamples
    for col in ref.colnames:
        assert check_close(ref[col], ss[col])

    assert ss.meta == ref.meta
Exemplo n.º 5
0
def test_halos_clean(tmp_path):
    '''Test loading a base (uncleaned) halo catalog
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=True,
                             fields='all',
                             cleaned=True)

    # to regenerate reference
    #ref = cat.halos
    #ref.write(HALOS_OUTPUT_CLEAN, all_array_storage='internal', all_array_compression='blsc')

    ref = Table.read(HALOS_OUTPUT_CLEAN)

    halos = cat.halos
    for col in ref.colnames:
        assert check_close(ref[col], halos[col])

    # all haloindex values should point to this slab
    assert np.all((halos['haloindex'] /
                   1e12).astype(int) == cat.header['FullStepNumber'])
    # ensure that all deleted halos in ref are marked as merged in EXAMPLE_SIM
    assert np.all(halos['is_merged_to'][ref['N'] == 0] != -1)
    # no deleted halos in ref should have merged particles in EXAMPLE_SIM
    assert np.all(halos['N_merge'][ref['N'] == 0] == 0)

    assert halos.meta == ref.meta
Exemplo n.º 6
0
def test_one_halo_info():
    '''Test loading a single halo_info file
    '''
    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(os.path.join(EXAMPLE_SIM, 'halos', 'z0.000', 'halo_info', 'halo_info_000.asdf'),
        load_subsamples=True)
    assert len(cat.halos) == 127
    assert len(cat.subsamples) == 9306
Exemplo n.º 7
0
def test_one_halo_info():
    '''Test loading a single halo_info file
    '''
    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000' / 'halo_info' /
                             'halo_info_000.asdf',
                             subsamples=True)
    assert len(cat.halos) == 127
    assert len(cat.subsamples) == 3209  #9306
Exemplo n.º 8
0
def test_halo_info_list():
    '''Test list of halo infos
    '''
    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog([
        EXAMPLE_SIM / 'halos' / 'z0.000' / 'halo_info' / 'halo_info_000.asdf',
        EXAMPLE_SIM / 'halos' / 'z0.000' / 'halo_info' / 'halo_info_001.asdf'
    ],
                             subsamples=True)
    assert len(cat.halos) == 281
    assert len(cat.subsamples) == 6900  #19555

    # check fail on dups
    with pytest.raises(ValueError):
        cat = CompaSOHaloCatalog([
            EXAMPLE_SIM / 'halos' / 'z0.000' / 'halo_info' /
            'halo_info_000.asdf', EXAMPLE_SIM / 'halos' / 'z0.000' /
            'halo_info' / 'halo_info_000.asdf'
        ])
Exemplo n.º 9
0
def test_subsamples_unclean(tmp_path):
    '''Test loading particle subsamples
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=dict(A=True),
                             fields='all',
                             cleaned=False)
    lenA = len(cat.subsamples)
    assert lenA == 2975
    assert cat.subsamples.colnames == ['pos', 'vel']

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=dict(B=True),
                             fields='all',
                             cleaned=False)
    lenB = len(cat.subsamples)
    assert lenB == 7082

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=True,
                             fields='all',
                             cleaned=False)

    assert len(cat.subsamples) == lenA + lenB

    # to regenerate reference
    #ref = cat.subsamples
    #import asdf; asdf.compression.set_compression_options(typesize='auto')
    #ref.write(PARTICLES_OUTPUT_UNCLEAN, format='asdf', all_array_storage='internal', all_array_compression='blsc')

    ref = Table.read(PARTICLES_OUTPUT_UNCLEAN)

    ss = cat.subsamples
    for col in ref.colnames:
        assert check_close(ref[col], ss[col])

    assert cat.subsamples.meta == ref.meta
Exemplo n.º 10
0
def test_halos(tmp_path):
    '''Test loading a halo catalog
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(os.path.join(EXAMPLE_SIM, 'halos', 'z0.000'), load_subsamples=True, fields='all')

    with open(tmp_path/'halos_test.txt', 'w') as fp:
        f = cat.halos[::5].pformat_all()
        fp.write('\n'.join(f))

    assert filecmp.cmp(HALOS_OUTPUT,tmp_path/'halos_test.txt')
Exemplo n.º 11
0
def get_smo_density_oneslab(i, simdir, simname, z_mock, N_dim):
    cat = CompaSOHaloCatalog(
    simdir+simname+'/halos/z'+str(z_mock).ljust(5, '0')+'/halo_info/halo_info_'\
        +str(i).zfill(3)+'.asdf', fields = ['N', 'x_L2com'])
    Lbox = cat.header['BoxSizeHMpc']
    halos = cat.halos
      
    # total number of objects                                                                                                      
    N_g = np.sum(halos['N'])   
    # get a 3d histogram with number of objects in each cell                                                                       
    D, edges = np.histogramdd(halos['x_L2com'], weights = halos['N'],
        bins = N_dim, range = [[-Lbox/2, Lbox/2],[-Lbox/2, Lbox/2],[-Lbox/2, Lbox/2]])   
    return D
Exemplo n.º 12
0
def test_filter_func():
    '''Test CHC filter_func
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             fields=['N', 'x_L2com'],
                             filter_func=lambda c: c['N'] > 100,
                             subsamples=True)
    assert (cat.halos['N'] > 100).all()
    assert len(cat.halos) == 146
    assert len(cat.subsamples) == 7193
Exemplo n.º 13
0
def test_halo_info_list():
    '''Test list of halo infos
    '''
    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog([
        os.path.join(EXAMPLE_SIM, 'halos', 'z0.000', 'halo_info',
                     'halo_info_000.asdf'),
        os.path.join(EXAMPLE_SIM, 'halos', 'z0.000', 'halo_info',
                     'halo_info_001.asdf')
    ],
                             load_subsamples=True)
    assert len(cat.halos) == 281
    assert len(cat.subsamples) == 19555

    # check fail on dups
    with pytest.raises(ValueError):
        cat = CompaSOHaloCatalog([
            os.path.join(EXAMPLE_SIM, 'halos', 'z0.000', 'halo_info',
                         'halo_info_000.asdf'),
            os.path.join(EXAMPLE_SIM, 'halos', 'z0.000', 'halo_info',
                         'halo_info_000.asdf')
        ])
Exemplo n.º 14
0
def prepare_cat(halo_cat_path, ndens):
    '''Load and downsample the cat
    '''
    # TODO: could use way less memory loading slab-by-slab
    cat = CompaSOHaloCatalog(halo_cat_path,
                             subsamples=False,
                             fields=('N', 'x_L2com'),
                             cleaned=False  # TODO
                            )
    log(f'Loading cat used {cat.nbytes()/1e9:.3g} GB')
    # Determine number of objects
    box = cat.header['BoxSize']
    N_select = int(box**3 * ndens)
    log(f'Selecting {N_select} objects')
    assert N_select > 0
    
    # Downsample catalog to N most massive
    iord = np.argsort(cat.halos['N'])[::-1]
    cat.halos = cat.halos[iord[:N_select]]
    del iord
    gc.collect()  # maybe can drop some memory
    
    return cat
Exemplo n.º 15
0
def get_smo_density_oneslab(i, simdir, simname, z_mock, N_dim, cleaning):
    slabname = simdir+simname+'/halos/z'+str(z_mock).ljust(5, '0')\
    +'/halo_info/halo_info_'+str(i).zfill(3)+'.asdf'

    cat = CompaSOHaloCatalog(
        slabname, fields = ['N', 'x_L2com'], cleaned_halos = cleaning)
    Lbox = cat.header['BoxSizeHMpc']
    halos = cat.halos

    if cleaning:
        halos = halos[halos['N'] > 0]

    # get a 3d histogram with number of objects in each cell                                                                       
    D, edges = np.histogramdd(halos['x_L2com'], weights = halos['N'],
        bins = N_dim, range = [[-Lbox/2, Lbox/2],[-Lbox/2, Lbox/2],[-Lbox/2, Lbox/2]])   
    return D
Exemplo n.º 16
0
def test_halos_unclean(tmp_path):
    '''Test loading a base (uncleaned) halo catalog
    '''

    from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog

    cat = CompaSOHaloCatalog(EXAMPLE_SIM / 'halos' / 'z0.000',
                             subsamples=True,
                             fields='all',
                             cleaned=False)

    # to regenerate reference
    #ref = cat.halos
    #ref.write(HALOS_OUTPUT_UNCLEAN, all_array_storage='internal', all_array_compression='blsc')

    ref = Table.read(HALOS_OUTPUT_UNCLEAN)

    halos = cat.halos
    for col in ref.colnames:
        assert check_close(ref[col], halos[col])

    assert halos.meta == ref.meta
Exemplo n.º 17
0
def prepare_slab(i, savedir, simdir, simname, z_mock, tracer_flags, MT, want_ranks, cleaning, N_dim, newseed, light_cones=False, light_cones_dir=''):
    outfilename_halos = savedir+'/halos_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushod'
    outfilename_particles = savedir+'/particles_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushod'
    print("processing slab ", i)
    if MT:
        outfilename_halos += '_MT'
        outfilename_particles += '_MT'
    if want_ranks:
        outfilename_particles += '_withranks'
    outfilename_particles += '_new.h5'
    outfilename_halos += '_new.h5'

    np.random.seed(newseed + i)
    # if file already exists, just skip
    if os.path.exists(outfilename_halos) \
    and os.path.exists(outfilename_particles):
        return 0


    # load the halo catalog slab
    print("loading halo catalog ")
    if light_cones:
        fields = ['N', 'N_interp', 'x_L2com', 'v_L2com', 'pos_interp', 'vel_interp', 'npstartA', 'npoutA', 'haloindex', 'sigmav3d_L2com']
    else:
        fields = ['N', 'x_L2com', 'v_L2com', 'r90_L2com', 'r25_L2com', 'npstartA', 'npoutA', 'id', 'sigmav3d_L2com']
    
    if light_cones:
        assert light_cones_dir != '', "You haven't specified light cone directory"
        # halo table filename
        halos_fn = os.path.join(light_cones_dir, 'halo_light_cones', simname, f'z{z_mock:4.3f}', 'lc_halo_info.asdf')
        
        # open the halo file
        with asdf.open(halos_fn, lazy_load=True, copy_arrays=True) as f:
            halos = f['data']
            header = f['header']
            cols = {col:np.array(halos[col]) for col in fields}
        halos = Table(cols, copy=False)

        # rename the columns to agree with the rest of the code
        halos['x_L2com'] = halos['pos_interp']
        halos['v_L2com'] = halos['vel_interp']
        halos['id'] = halos['haloindex']
        #halos['v_L2com'] = halos['vel_avg'] # use averaged particle positions
        halos['N'] = halos['N_interp']
        N_halos = len(halos['N'])
                
        # testing: needs to be changed once we copy all halo fields
        halos['r25_L2com'] = np.ones(N_halos)
        halos['r90_L2com'] = np.ones(N_halos)

        # load the particles
        with asdf.open(os.path.join(light_cones_dir, 'halo_light_cones', simname, f'z{z_mock:4.3f}', 'lc_pid_rv.asdf'), lazy_load=True, copy_arrays=True) as f:
            parts = f['data']
            header = f['header']
            cols = {col:np.array(parts[col]) for col in ['pos', 'vel']}
        parts = Table(cols, copy=False)
    else:
        slabname = simdir+simname+'/halos/z'+str(z_mock).ljust(5, '0')\
        +'/halo_info/halo_info_'+str(i).zfill(3)+'.asdf'

        cat = CompaSOHaloCatalog(slabname, subsamples=dict(A=True, rv=True), fields = fields, 
            cleaned_halos = cleaning)
        halos = cat.halos
        if cleaning:
            halos = halos[halos['N'] > 0]

        parts = cat.subsamples
        header = cat.header

    
    Lbox = header['BoxSizeHMpc']
    Mpart = header['ParticleMassHMsun'] # msun / h 
    H0 = header['H0']
    h = H0/100.0

    # # form a halo table of the columns i care about 
    # creating a mask of which halos to keep, which halos to drop
    p_halos = subsample_halos(halos['N']*Mpart, MT)
    mask_halos = np.random.random(N_halos) < p_halos
    print("total number of halos, ", N_halos, "keeping ", np.sum(mask_halos))

    halos['mask_subsample'] = mask_halos
    halos['multi_halos'] = 1.0 / p_halos

    nbins = 100
    mbins = np.logspace(np.log10(3e10), 15.5, nbins + 1)

    print("computing density rank")
    fenv_rank = np.zeros(N_halos)
    if light_cones:
        print("TBH, I am just lazy, but to do this properly would need to use density maps of the full boxes and figure out wrapping cause the light cones go beyond the box")
    else:
        dens_grid = np.array(h5py.File(savedir+"/density_field.h5", 'r')['dens'])
        ixs = np.floor((np.array(halos['x_L2com']) + Lbox/2) / (Lbox/N_dim)).astype(np.int) % N_dim
        halos_overdens = dens_grid[ixs[:, 0], ixs[:, 1], ixs[:, 2]]
        
        for ibin in range(nbins):
            mmask = (halos['N']*Mpart > mbins[ibin]) & (halos['N']*Mpart < mbins[ibin + 1])
            if np.sum(mmask) > 0:
                if np.sum(mmask) == 1:
                    fenv_rank[mmask] = 0
                else:
                    new_fenv_rank = halos_overdens[mmask].argsort().argsort()
                    fenv_rank[mmask] = new_fenv_rank / np.max(new_fenv_rank) - 0.5
    halos['fenv_rank'] = fenv_rank

    # compute delta concentration
    print("computing c rank")
    halos_c = halos['r90_L2com']/halos['r25_L2com']
    deltac_rank = np.zeros(N_halos)
    #if light_cones:
    #    print("Concentration not implemented!")
    if True:#else:
        for ibin in range(nbins):
            mmask = (halos['N']*Mpart > mbins[ibin]) & (halos['N']*Mpart < mbins[ibin + 1])
            if np.sum(mmask) > 0:
                if np.sum(mmask) == 1:
                    deltac_rank[mmask] = 0
                else:
                    new_deltac = halos_c[mmask] - np.median(halos_c[mmask])
                    new_deltac_rank = new_deltac.argsort().argsort()
                    deltac_rank[mmask] = new_deltac_rank / np.max(new_deltac_rank) - 0.5
    halos['deltac_rank'] = deltac_rank

    # the new particle start, len, and multiplier
    halos_pstart = halos['npstartA']
    halos_pnum = halos['npoutA']
    #halos_pstart = np.zeros(len(halos_pnum), dtype=int)
    #halos_pstart[1:] = np.cumsum(halos_pnum)[:-1]
    halos_pstart_new = np.zeros(N_halos)
    halos_pnum_new = np.zeros(N_halos)

    # particle arrays for ranks and mask
    N_parts = parts['vel'][:].shape[0]
    mask_parts = np.zeros(N_parts)
    len_old = N_parts
    ranks_parts = np.full(len_old, -1.0)
    ranksv_parts = np.full(len_old, -1.0)
    ranksr_parts = np.full(len_old, -1.0)
    ranksp_parts = np.full(len_old, -1.0)
    pos_parts = np.full((len_old, 3), -1.0)
    vel_parts = np.full((len_old, 3), -1.0)
    hvel_parts = np.full((len_old, 3), -1.0)
    Mh_parts = np.full(len_old, -1.0)
    Np_parts = np.full(len_old, -1.0)
    downsample_parts = np.full(len_old, -1.0)
    idh_parts = np.full(len_old, -1)
    deltach_parts = np.full(len_old, -1.0)
    fenvh_parts = np.full(len_old, -1.0)

    print("compiling particle subsamples")
    start_tracker = 0
    for j in np.arange(N_halos):
        if j % 10000 == 0:
            print("halo id", j, end = '\r')
        if mask_halos[j]:
            # updating the mask tagging the particles we want to preserve
            subsample_factor = subsample_particles(halos['N'][j] * Mpart, MT)
            submask = np.random.binomial(n = 1, p = subsample_factor, size = halos_pnum[j])
            # updating the particles' masks, downsample factors, halo mass
            mask_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = submask
            # print(j, halos_pstart, halos_pnum, p_halos, downsample_parts)
            downsample_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = p_halos[j]
            hvel_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = halos['v_L2com'][j]
            Mh_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = halos['N'][j] * Mpart # in msun / h
            Np_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = np.sum(submask)
            idh_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = halos['id'][j] 
            deltach_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = deltac_rank[j]
            fenvh_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = fenv_rank[j]

            # updating the pstart, pnum, for the halos
            halos_pstart_new[j] = start_tracker
            halos_pnum_new[j] = np.sum(submask)
            start_tracker += np.sum(submask)

            if want_ranks:
                if np.sum(submask) == 0:
                    continue
                # extract particle index
                indices_parts = np.arange(
                    halos_pstart[j], halos_pstart[j] + halos_pnum[j])[submask.astype(bool)]
                indices_parts = indices_parts.astype(int)
                if np.sum(submask) == 1:
                    ranks_parts[indices_parts] = 0
                    ranksv_parts[indices_parts] = 0
                    ranksp_parts[indices_parts] = 0
                    ranksr_parts[indices_parts] = 0
                    continue
                
                # make the rankings
                theseparts = parts[
                    halos_pstart[j]: halos_pstart[j] + halos_pnum[j]][submask.astype(bool)]
                theseparts_pos = theseparts['pos']
                theseparts_vel = theseparts['vel']
                theseparts_halo_pos = halos['x_L2com'][j]
                theseparts_halo_vel = halos['v_L2com'][j]

                dist2_rel = np.sum((theseparts_pos - theseparts_halo_pos)**2, axis = 1)
                newranks = dist2_rel.argsort().argsort() 
                ranks_parts[indices_parts] = (newranks - np.mean(newranks)) / np.mean(newranks)

                v2_rel = np.sum((theseparts_vel - theseparts_halo_vel)**2, axis = 1)
                newranksv = v2_rel.argsort().argsort() 
                ranksv_parts[indices_parts] = (newranksv - np.mean(newranksv)) / np.mean(newranksv)

                # get rps
                # calc relative positions
                r_rel = theseparts_pos - theseparts_halo_pos 
                r0 = np.sqrt(np.sum(r_rel**2, axis = 1))
                r_rel_norm = r_rel/r0[:, None]

                # list of peculiar velocities of the particles
                vels_rel = theseparts_vel - theseparts_halo_vel # velocity km/s
                # relative speed to halo center squared
                v_rel2 = np.sum(vels_rel**2, axis = 1) 

                # calculate radial and tangential peculiar velocity
                vel_rad = np.sum(vels_rel*r_rel_norm, axis = 1)
                newranksr = vel_rad.argsort().argsort() 
                ranksr_parts[indices_parts] = (newranksr - np.mean(newranksr)) / np.mean(newranksr)

                # radial component
                v_rad2 = vel_rad**2 # speed
                # tangential component
                v_tan2 = v_rel2 - v_rad2

                # compute the perihelion distance for NFW profile
                m = halos['N'][j]*Mpart / h # in kg
                rs = halos['r25_L2com'][j]
                c = halos['r90_L2com'][j]/rs
                r0_kpc = r0*1000 # kpc
                alpha = 1.0/(np.log(1+c)-c/(1+c))*2*6.67e-11*m*2e30/r0_kpc/3.086e+19/1e6

                # iterate a few times to solve for rp
                x2 = v_tan2/(v_tan2+v_rad2)

                num_iters = 20 # how many iterations do we want
                factorA = v_tan2 + v_rad2
                factorB = np.log(1+r0_kpc/rs)
                for it in range(num_iters):
                    oldx = np.sqrt(x2)
                    x2 = v_tan2/(factorA + alpha*(np.log(1+oldx*r0_kpc/rs)/oldx - factorB))
                x2[np.isnan(x2)] = 1
                # final perihelion distance 
                rp2 = r0_kpc**2*x2
                newranksp = rp2.argsort().argsort() 
                ranksp_parts[indices_parts] = (newranksp - np.mean(newranksp)) / np.mean(newranksp)

        else:
            halos_pstart_new[j] = -1
            halos_pnum_new[j] = -1

    halos['npstartA'] = halos_pstart_new
    halos['npoutA'] = halos_pnum_new
    halos['randoms'] = np.random.random(N_halos) # attaching random numbers
    halos['randoms_gaus_vrms'] = np.random.normal(loc = 0, 
        scale = halos["sigmav3d_L2com"]/np.sqrt(3), size = N_halos) # attaching random numbers

    # output halo file 
    print("outputting new halo file ")
    # output_dir = savedir+'/halos_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushodMT_new.h5'
    if os.path.exists(outfilename_halos):
        os.remove(outfilename_halos)
    print(outfilename_halos, outfilename_particles)
    newfile = h5py.File(outfilename_halos, 'w')

    if light_cones:
        halos = Table(halos)
        parts = Table(parts)
    
    dataset = newfile.create_dataset('halos', data = halos[mask_halos])
    newfile.close()

    # output the new particle file
    print("adding rank fields to particle data ")
    mask_parts = mask_parts.astype(bool)
    parts = parts[mask_parts]
    N_parts = parts['vel'][:].shape[0]
    print("pre process particle number ", len_old, " post process particle number ", N_parts)
    if want_ranks:
        parts['ranks'] = ranks_parts[mask_parts]
        parts['ranksv'] = ranksv_parts[mask_parts]
        parts['ranksr'] = ranksr_parts[mask_parts]
        parts['ranksp'] = ranksp_parts[mask_parts]
    parts['downsample_halo'] = downsample_parts[mask_parts]
    parts['halo_vel'] = hvel_parts[mask_parts]
    parts['halo_mass'] = Mh_parts[mask_parts]
    parts['Np'] = Np_parts[mask_parts]
    parts['halo_id'] = idh_parts[mask_parts]
    parts['randoms'] = np.random.random(N_parts)
    parts['halo_deltac'] = deltach_parts[mask_parts]
    parts['halo_fenv'] = fenvh_parts[mask_parts]
    
    print("are there any negative particle values? ", np.sum(parts['downsample_halo'] < 0), 
        np.sum(parts['halo_mass'] < 0))
    print("outputting new particle file ")
    # output_dir = savedir+'/particles_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushodMT_new.h5'
    if os.path.exists(outfilename_particles):
        os.remove(outfilename_particles)
    newfile = h5py.File(outfilename_particles, 'w')
    dataset = newfile.create_dataset('particles', data = parts)
    newfile.close()

    print("pre process particle number ", len_old, " post process particle number ", N_parts)
Exemplo n.º 18
0
def prepare_slab(i,
                 savedir,
                 simdir,
                 simname,
                 z_mock,
                 tracer_flags,
                 MT,
                 want_ranks,
                 want_AB,
                 cleaning,
                 newseed,
                 halo_lc=False,
                 nthread=1,
                 overwrite=1,
                 mcut=1e11,
                 rad_outer=5.):
    outfilename_halos = savedir + '/halos_xcom_' + str(i) + '_seed' + str(
        newseed) + '_abacushod_oldfenv'
    outfilename_particles = savedir + '/particles_xcom_' + str(
        i) + '_seed' + str(newseed) + '_abacushod_oldfenv'
    print("processing slab ", i)
    if MT:
        outfilename_halos += '_MT'
        outfilename_particles += '_MT'
    if want_ranks:
        outfilename_particles += '_withranks'
    outfilename_particles += '_new.h5'
    outfilename_halos += '_new.h5'

    np.random.seed(newseed + i)
    # if file already exists, just skip
    overwrite = int(overwrite)
    if (not overwrite) and (os.path.exists(outfilename_halos)) \
    and (os.path.exists(outfilename_particles)):
        print("files exists, skipping ", i)
        return 0

    # load the halo catalog slab
    print("loading halo catalog ")
    if halo_lc:
        slabname = simdir + '/' + simname + '/z' + str(z_mock).ljust(
            5, '0') + '/lc_halo_info.asdf'
        id_key = 'index_halo'
        pos_key = 'pos_interp'
        vel_key = 'vel_interp'
        N_key = 'N_interp'
    else:
        slabname = simdir+'/'+simname+'/halos/z'+str(z_mock).ljust(5, '0')\
                   +'/halo_info/halo_info_'+str(i).zfill(3)+'.asdf'
        id_key = 'id'
        pos_key = 'x_L2com'
        vel_key = 'v_L2com'
        N_key = 'N'

    cat = CompaSOHaloCatalog(slabname,
                             subsamples=dict(A=True, rv=True),
                             fields=[
                                 N_key, pos_key, vel_key, 'r90_L2com',
                                 'r25_L2com', 'r98_L2com', 'npstartA',
                                 'npoutA', id_key, 'sigmav3d_L2com'
                             ],
                             cleaned=cleaning)
    assert halo_lc == cat.halo_lc

    halos = cat.halos
    if halo_lc:
        halos['id'] = halos[id_key]
        halos['x_L2com'] = halos[pos_key]
        halos['v_L2com'] = halos[vel_key]
        halos['N'] = halos[N_key]
    if cleaning:
        halos = halos[halos['N'] > 0]

    parts = cat.subsamples
    header = cat.header
    Lbox = cat.header['BoxSizeHMpc']
    Mpart = header['ParticleMassHMsun']  # msun / h
    H0 = header['H0']
    h = H0 / 100.0

    # # form a halo table of the columns i care about
    # creating a mask of which halos to keep, which halos to drop
    p_halos = subsample_halos(halos['N'] * Mpart, MT)
    mask_halos = np.random.random(len(halos)) < p_halos
    print("total number of halos, ", len(halos), "keeping ",
          np.sum(mask_halos))

    halos['mask_subsample'] = mask_halos
    halos['multi_halos'] = 1.0 / p_halos

    # only generate fenv ranks and c ranks if the user wants to enable secondary biases
    if want_AB:
        nbins = 100
        mbins = np.logspace(np.log10(mcut), 15.5, nbins + 1)

        # # grid based environment calculation
        # dens_grid = np.array(h5py.File(savedir+"/density_field.h5", 'r')['dens'])
        # ixs = np.floor((np.array(halos['x_L2com']) + Lbox/2) / (Lbox/N_dim)).astype(np.int) % N_dim
        # halos_overdens = dens_grid[ixs[:, 0], ixs[:, 1], ixs[:, 2]]
        # fenv_rank = np.zeros(len(halos))
        # for ibin in range(nbins):
        #     mmask = (halos['N']*Mpart > mbins[ibin]) & (halos['N']*Mpart < mbins[ibin + 1])
        #     if np.sum(mmask) > 0:
        #         if np.sum(mmask) == 1:
        #             fenv_rank[mmask] = 0
        #         else:
        #             new_fenv_rank = halos_overdens[mmask].argsort().argsort()
        #             fenv_rank[mmask] = new_fenv_rank / np.max(new_fenv_rank) - 0.5
        # halos['fenv_rank'] = fenv_rank

        allpos = halos['x_L2com']
        allmasses = halos['N'] * Mpart

        if halo_lc:
            # origin dependent and simulation dependent
            origins = np.array(header['LightConeOrigins']).reshape(-1, 3)
            alldist = np.sqrt(np.sum((allpos - origins[0])**2., axis=1))
            offset = 10.  # offset intrinsic to light cones catalogs (removing edges +/- 10 Mpc/h from the sides of the box)

            r_min = alldist.min()
            r_max = alldist.max()
            x_min_edge = -(Lbox / 2. - offset - rad_outer)
            y_min_edge = -(Lbox / 2. - offset - rad_outer)
            z_min_edge = -(Lbox / 2. - offset - rad_outer)
            x_max_edge = Lbox / 2. - offset - rad_outer
            r_min_edge = alldist.min() + rad_outer
            r_max_edge = alldist.max() - rad_outer
            if origins.shape[
                    0] == 1:  # true only of the huge box where the origin is at the center
                y_max_edge = Lbox / 2. - offset - rad_outer
                z_max_edge = Lbox / 2. - offset - rad_outer
            else:
                y_max_edge = 3. / 2 * Lbox - rad_outer
                z_max_edge = 3. / 2 * Lbox - rad_outer

            bounds_edge = ((x_min_edge <= allpos[:, 0]) &
                           (x_max_edge >= allpos[:, 0]) &
                           (y_min_edge <= allpos[:, 1]) &
                           (y_max_edge >= allpos[:, 1]) &
                           (z_min_edge <= allpos[:, 2]) &
                           (z_max_edge >= allpos[:, 2]) &
                           (r_min_edge <= alldist) & (r_max_edge >= alldist))
            index_bounds = np.arange(allpos.shape[0], dtype=int)[~bounds_edge]
            del bounds_edge, alldist

            if len(index_bounds) > 0:
                # factor of rands to generate
                rand = 10
                rand_N = allpos.shape[0] * rand

                # generate randoms in L shape
                randpos, randdist = gen_rand(allpos.shape[0], r_min, r_max,
                                             rand, Lbox, offset, origins)
                rand_n = rand_N / (4. / 3. * np.pi * (r_max**3 - r_min**3))

                # boundaries of the random particles for cutting
                randbounds_edge = ((x_min_edge <= randpos[:, 0]) &
                                   (x_max_edge >= randpos[:, 0]) &
                                   (y_min_edge <= randpos[:, 1]) &
                                   (y_max_edge >= randpos[:, 1]) &
                                   (z_min_edge <= randpos[:, 2]) &
                                   (z_max_edge >= randpos[:, 2]) &
                                   (r_min_edge <= randdist) &
                                   (r_max_edge >= randdist))
                randpos = randpos[~randbounds_edge]
                del randbounds_edge, randdist

                if randpos.shape[0] > 0:
                    # random points on the edges
                    rand_N = randpos.shape[0]
                    randpos_tree = cKDTree(randpos)
                    randinds_inner = randpos_tree.query_ball_point(
                        allpos[index_bounds],
                        r=halos['r98_L2com'][index_bounds],
                        n_jobs=nthread)
                    randinds_outer = randpos_tree.query_ball_point(
                        allpos[index_bounds], r=rad_outer, n_jobs=nthread)
                    rand_norm = np.zeros(len(index_bounds))
                    for ind in np.arange(len(index_bounds)):
                        rand_norm[ind] = (len(randinds_outer[ind]) -
                                          len(randinds_inner[ind]))
                    rand_norm /= (
                        (rad_outer**3. - halos['r98_L2com'][index_bounds]**3.)
                        * 4. / 3. * np.pi * rand_n)  # expected number
                else:
                    rand_norm = np.ones(len(index_bounds))

        Menv = do_Menv_from_tree(allpos,
                                 allmasses,
                                 r_inner=halos['r98_L2com'],
                                 r_outer=rad_outer,
                                 halo_lc=halo_lc,
                                 Lbox=Lbox,
                                 nthread=nthread,
                                 mcut=mcut)
        gc.collect()

        # Menv = np.array([np.sum(allmasses[allinds_outer[ind]]) - np.sum(allmasses[allinds_inner[ind]]) \
        #     for ind in np.arange(len(halos))])
        # Menv = calc_Menv(allmasses, allinds_outer, allinds_inner)

        if halo_lc and len(index_bounds) > 0:
            Menv[index_bounds] *= rand_norm

        # fenv_rank = np.zeros(len(Menv))
        # for ibin in range(nbins):
        #     mmask = (halos['N']*Mpart > mbins[ibin]) \
        #     & (halos['N']*Mpart < mbins[ibin + 1])
        #     if np.sum(mmask) > 0:
        #         if np.sum(mmask) == 1:
        #             fenv_rank[mmask] = 0
        #         else:
        #             new_fenv_rank = Menv[mmask].argsort().argsort()
        #             fenv_rank[mmask] = new_fenv_rank / np.max(new_fenv_rank) - 0.5

        halos['fenv_rank'] = calc_fenv_opt(Menv, mbins, allmasses)

        # compute delta concentration
        print("computing c rank")
        halos_c = halos['r90_L2com'] / halos['r25_L2com']
        deltac_rank = np.zeros(len(halos))
        for ibin in range(nbins):
            mmask = (allmasses > mbins[ibin]) & (allmasses < mbins[ibin + 1])
            if np.sum(mmask) > 0:
                if np.sum(mmask) == 1:
                    deltac_rank[mmask] = 0
                else:
                    new_deltac = halos_c[mmask] - np.median(halos_c[mmask])
                    new_deltac_rank = new_deltac.argsort().argsort()
                    deltac_rank[mmask] = new_deltac_rank / np.max(
                        new_deltac_rank) - 0.5
        halos['deltac_rank'] = deltac_rank

    else:
        halos['fenv_rank'] = np.zeros(len(halos))
        halos['deltac_rank'] = np.zeros(len(halos))

    # the new particle start, len, and multiplier
    halos_pstart = halos['npstartA']
    halos_pnum = halos['npoutA']
    halos_pstart_new = np.zeros(len(halos))
    halos_pnum_new = np.zeros(len(halos))

    # particle arrays for ranks and mask
    mask_parts = np.zeros(len(parts))
    len_old = len(parts)
    ranks_parts = np.full(len_old, -1.0)
    ranksv_parts = np.full(len_old, -1.0)
    ranksr_parts = np.full(len_old, -1.0)
    ranksp_parts = np.full(len_old, -1.0)
    pos_parts = np.full((len_old, 3), -1.0)
    vel_parts = np.full((len_old, 3), -1.0)
    hvel_parts = np.full((len_old, 3), -1.0)
    Mh_parts = np.full(len_old, -1.0)
    Np_parts = np.full(len_old, -1.0)
    downsample_parts = np.full(len_old, -1.0)
    idh_parts = np.full(len_old, -1)
    deltach_parts = np.full(len_old, -1.0)
    fenvh_parts = np.full(len_old, -1.0)

    print("compiling particle subsamples")
    start_tracker = 0
    print(len(halos), np.sum(mask_halos))
    for j in np.arange(len(halos)):
        if j % 10000 == 0:
            print("halo id", j, end='\r')
        if mask_halos[j] and halos['npoutA'][j] > 0:
            # subsample_factor = subsample_particles(halos['N'][j] * Mpart, halos['npoutA'][j], MT)
            # submask = np.random.binomial(n = 1, p = subsample_factor, size = halos_pnum[j])
            submask = submask_particles(halos['N'][j] * Mpart,
                                        halos['npoutA'][j], MT)

            # updating the particles' masks, downsample factors, halo mass
            mask_parts[halos_pstart[j]:halos_pstart[j] +
                       halos_pnum[j]] = submask
            # print(j, halos_pstart, halos_pnum, p_halos, downsample_parts)
            downsample_parts[halos_pstart[j]:halos_pstart[j] +
                             halos_pnum[j]] = p_halos[j]
            hvel_parts[halos_pstart[j]:halos_pstart[j] +
                       halos_pnum[j]] = halos['v_L2com'][j]
            Mh_parts[halos_pstart[j]:halos_pstart[j] +
                     halos_pnum[j]] = halos['N'][j] * Mpart  # in msun / h
            Np_parts[halos_pstart[j]:halos_pstart[j] +
                     halos_pnum[j]] = np.sum(submask)
            idh_parts[halos_pstart[j]:halos_pstart[j] +
                      halos_pnum[j]] = halos['id'][j]
            deltach_parts[halos_pstart[j]:halos_pstart[j] +
                          halos_pnum[j]] = halos['deltac_rank'][j]
            fenvh_parts[halos_pstart[j]:halos_pstart[j] +
                        halos_pnum[j]] = halos['fenv_rank'][j]

            # updating the pstart, pnum, for the halos
            halos_pstart_new[j] = start_tracker
            halos_pnum_new[j] = np.sum(submask)
            start_tracker += np.sum(submask)

            if want_ranks:
                if np.sum(submask) == 0:
                    continue
                # extract particle index
                indices_parts = np.arange(halos_pstart[j], halos_pstart[j] +
                                          halos_pnum[j])[submask.astype(bool)]
                indices_parts = indices_parts.astype(int)
                if np.sum(submask) == 1:
                    ranks_parts[indices_parts] = 0
                    ranksv_parts[indices_parts] = 0
                    ranksp_parts[indices_parts] = 0
                    ranksr_parts[indices_parts] = 0
                    continue

                # make the rankings
                theseparts = parts[halos_pstart[j]:halos_pstart[j] +
                                   halos_pnum[j]][submask.astype(bool)]
                theseparts_pos = theseparts['pos']
                theseparts_vel = theseparts['vel']
                theseparts_halo_pos = halos['x_L2com'][j]
                theseparts_halo_vel = halos['v_L2com'][j]

                dist2_rel = np.sum((theseparts_pos - theseparts_halo_pos)**2,
                                   axis=1)
                newranks = dist2_rel.argsort().argsort()
                ranks_parts[indices_parts] = (
                    newranks - np.mean(newranks)) / np.mean(newranks)

                v2_rel = np.sum((theseparts_vel - theseparts_halo_vel)**2,
                                axis=1)
                newranksv = v2_rel.argsort().argsort()
                ranksv_parts[indices_parts] = (
                    newranksv - np.mean(newranksv)) / np.mean(newranksv)

                # get rps
                # calc relative positions
                r_rel = theseparts_pos - theseparts_halo_pos
                r0 = np.sqrt(np.sum(r_rel**2, axis=1))
                r_rel_norm = r_rel / r0[:, None]

                # list of peculiar velocities of the particles
                vels_rel = theseparts_vel - theseparts_halo_vel  # velocity km/s
                # relative speed to halo center squared
                v_rel2 = np.sum(vels_rel**2, axis=1)

                # calculate radial and tangential peculiar velocity
                vel_rad = np.sum(vels_rel * r_rel_norm, axis=1)
                newranksr = vel_rad.argsort().argsort()
                ranksr_parts[indices_parts] = (
                    newranksr - np.mean(newranksr)) / np.mean(newranksr)

                # radial component
                v_rad2 = vel_rad**2  # speed
                # tangential component
                v_tan2 = v_rel2 - v_rad2

                # compute the perihelion distance for NFW profile
                m = halos['N'][j] * Mpart / h  # in kg
                rs = halos['r25_L2com'][j]
                c = halos['r90_L2com'][j] / rs
                r0_kpc = r0 * 1000  # kpc
                alpha = 1.0 / (
                    np.log(1 + c) - c / (1 + c)
                ) * 2 * 6.67e-11 * m * 2e30 / r0_kpc / 3.086e+19 / 1e6

                # iterate a few times to solve for rp
                x2 = v_tan2 / (v_tan2 + v_rad2)

                num_iters = 20  # how many iterations do we want
                factorA = v_tan2 + v_rad2
                factorB = np.log(1 + r0_kpc / rs)
                for it in range(num_iters):
                    oldx = np.sqrt(x2)
                    x2 = v_tan2 / (
                        factorA + alpha *
                        (np.log(1 + oldx * r0_kpc / rs) / oldx - factorB))
                x2[np.isnan(x2)] = 1
                # final perihelion distance
                rp2 = r0_kpc**2 * x2
                newranksp = rp2.argsort().argsort()
                ranksp_parts[indices_parts] = (
                    newranksp - np.mean(newranksp)) / np.mean(newranksp)

        else:
            halos_pstart_new[j] = -1
            halos_pnum_new[j] = -1

    halos['npstartA'] = halos_pstart_new
    halos['npoutA'] = halos_pnum_new
    halos['randoms'] = np.random.random(len(halos))  # attaching random numbers
    halos['randoms_gaus_vrms'] = np.random.normal(
        loc=0, scale=halos["sigmav3d_L2com"] / np.sqrt(3),
        size=len(halos))  # attaching random numbers

    # output halo file
    print("outputting new halo file ")
    # output_dir = savedir+'/halos_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushodMT_new.h5'
    if os.path.exists(outfilename_halos):
        os.remove(outfilename_halos)
    newfile = h5py.File(outfilename_halos, 'w')
    dataset = newfile.create_dataset('halos', data=halos[mask_halos])
    newfile.close()

    # output the new particle file
    print("adding rank fields to particle data ")
    mask_parts = mask_parts.astype(bool)
    parts = parts[mask_parts]
    print("pre process particle number ", len_old,
          " post process particle number ", len(parts))
    if want_ranks:
        parts['ranks'] = ranks_parts[mask_parts]
        parts['ranksv'] = ranksv_parts[mask_parts]
        parts['ranksr'] = ranksr_parts[mask_parts]
        parts['ranksp'] = ranksp_parts[mask_parts]
    parts['downsample_halo'] = downsample_parts[mask_parts]
    parts['halo_vel'] = hvel_parts[mask_parts]
    parts['halo_mass'] = Mh_parts[mask_parts]
    parts['Np'] = Np_parts[mask_parts]
    parts['halo_id'] = idh_parts[mask_parts]
    parts['randoms'] = np.random.random(len(parts))
    parts['halo_deltac'] = deltach_parts[mask_parts]
    parts['halo_fenv'] = fenvh_parts[mask_parts]

    print("are there any negative particle values? ",
          np.sum(parts['downsample_halo'] < 0), np.sum(parts['halo_mass'] < 0))
    print("outputting new particle file ")
    # output_dir = savedir+'/particles_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushodMT_new.h5'
    if os.path.exists(outfilename_particles):
        os.remove(outfilename_particles)
    newfile = h5py.File(outfilename_particles, 'w')
    dataset = newfile.create_dataset('particles', data=parts)
    newfile.close()

    print("pre process particle number ", len_old,
          " post process particle number ", len(parts))
Exemplo n.º 19
0
def main():
    parser = ArgumentParser(
        description='prepare downsample abacus box for tabulation')
    parser.add_argument('--inifile', '-ini', help='config file for downsample')
    args = parser.parse_args()

    config = ConfigObj(args.inifile)

    abacusbox = config['abacusbox']
    redshift = config['redshift']
    outpath = config['outpath']
    frac = config.as_float('partfrac')
    seed = config.as_int('randseed')
    mcut = config.as_float('masscut')
    step = config.as_bool('halostep')
    rsd = config.as_bool('usersd')

    print(abacusbox, redshift, outpath, frac, seed, mcut, step, rsd)

    abacuscat = sorted(
        glob.glob('/global/cfs/cdirs/desi/cosmosim/Abacus/' + abacusbox +
                  '/halos/' + redshift + '/halo_info/*.asdf'))

    testcat = CompaSOHaloCatalog(abacuscat[0], fields=['N'], cleaned=True)

    simParams = {}
    simParams['redshift'] = testcat.header['Redshift']
    simParams['h'] = testcat.header['H0']
    simParams['boxsize'] = testcat.header['BoxSize']
    simParams['rsd'] = 1 / (testcat.header['VelZSpace_to_kms'] /
                            testcat.header['BoxSize'])
    simParams['Mpart'] = testcat.header['ParticleMassHMsun']
    simParams['Ncut'] = mcut / simParams['Mpart']
    print(simParams)
    print('Ncut =', simParams['Ncut'])

    for islab in range(9):
        print('working on slab', islab)

        if (islab == 8):
            cat = CompaSOHaloCatalog(
                abacuscat[islab * 4:],
                fields=['N', 'x_L2com', 'v_L2com', 'npstartA', 'npoutA'],
                subsamples=dict(A=True, rv=True),
                cleaned=True)
        else:
            cat = CompaSOHaloCatalog(
                abacuscat[islab * 4:4 + islab * 4],
                fields=['N', 'x_L2com', 'v_L2com', 'npstartA', 'npoutA'],
                subsamples=dict(A=True, rv=True),
                cleaned=True)

        print('finish reading slab')
        haloslab, partslab = prep_halo_part(cat.halos, cat.subsamples, frac,
                                            rsd, step, seed + islab, simParams)
        print('finish prep')
        haloslab.write(outpath + 'halos_%02d.hdf5' % islab, path='data')
        partslab.write(outpath + 'parts_%02d.hdf5' % islab, path='data')
        del cat
        del haloslab
        del partslab
        gc.collect()

    return 0
Exemplo n.º 20
0
def main(sim_name,
         z_start,
         z_stop,
         compaso_parent,
         catalog_parent,
         merger_parent,
         save_pos=False,
         purge=False,
         complete=False,
         want_subsample_B=True):

    compaso_parent = Path(compaso_parent)
    catalog_parent = Path(catalog_parent)
    merger_parent = Path(merger_parent)

    # directory where the CompaSO halo catalogs are saved
    cat_dir = compaso_parent / sim_name / "halos"
    clean_dir = compaso_parent / "cleaning" / sim_name

    # obtain the redshifts of the CompaSO catalogs
    redshifts = glob.glob(os.path.join(cat_dir, "z*"))
    zs_cat = [extract_redshift(redshifts[i]) for i in range(len(redshifts))]

    # directory where we save the final outputs
    cat_lc_dir = catalog_parent / "halo_light_cones" / sim_name

    # directory where the merger tree files are kept
    merger_dir = merger_parent / sim_name

    # if merger tree redshift information has been saved, load it (if not, save it)
    if not os.path.exists(Path("data_mt") / sim_name / "zs_mt.npy"):
        # all merger tree snapshots and corresponding redshifts
        snaps_mt = sorted(merger_dir.glob("associations_z*.0.asdf"))
        zs_mt = get_zs_from_headers(snaps_mt)
        os.makedirs(Path("data_mt") / sim_name, exist_ok=True)
        np.save(Path("data_mt") / sim_name / "zs_mt.npy", zs_mt)
    zs_mt = np.load(Path("data_mt") / sim_name / "zs_mt.npy")

    # names of the merger tree file for a given redshift
    merger_fns = list(merger_dir.glob("associations_z%4.3f.*.asdf" % zs_mt[0]))

    # number of superslabs
    n_superslabs = len(merger_fns)
    print("number of superslabs = ", n_superslabs)

    # all redshifts, steps and comoving distances of light cones files; high z to low z
    # remove presaving after testing done (or make sure presaved can be matched with simulation)
    if not os.path.exists(
            Path("data_headers") / sim_name /
            "coord_dist.npy") or not os.path.exists(
                Path("data_headers") / sim_name /
                "redshifts.npy") or not os.path.exists(
                    Path("data_headers") / sim_name / "eta_drift.npy"):
        zs_all, steps_all, chis_all, etad_all = get_lc_info(
            Path("all_headers") / sim_name)
        os.makedirs(Path("data_headers") / sim_name, exist_ok=True)
        np.save(Path("data_headers") / sim_name / "redshifts.npy", zs_all)
        np.save(Path("data_headers") / sim_name / "steps.npy", steps_all)
        np.save(Path("data_headers") / sim_name / "coord_dist.npy", chis_all)
        np.save(Path("data_headers") / sim_name / "eta_drift.npy", etad_all)
    zs_all = np.load(Path("data_headers") / sim_name / "redshifts.npy")
    chis_all = np.load(Path("data_headers") / sim_name / "coord_dist.npy")
    etad_all = np.load(Path("data_headers") / sim_name / "eta_drift.npy")
    zs_all[-1] = float(
        "%.1f" % zs_all[-1]
    )  # LHG: I guess this is trying to match up to some filename or something?

    # fields to copy directly from the halo_info files
    raw_dic = {}
    with asdf.open(
            str(cat_dir / ("z%.3f" % zs_cat[0]) / 'halo_info' /
                'halo_info_000.asdf')) as f:
        for key in f['data'].keys():
            if 'L2' not in key: continue
            try:
                raw_dic[key] = (f['data'][key].dtype, f['data'][key].shape[1])
            except:
                raw_dic[key] = f['data'][key].dtype
        header = f['header']  # just for getting the name of the redshift

    # just for testing; remove for final version
    if want_subsample_B:
        fields_cat = [
            'npstartA', 'npoutA', 'npstartB', 'npoutB', 'N', 'v_L2com',
            'x_L2com'
        ]  #, 'id', 'x_L2com', 'sigmav3d_L2com', 'r90_L2com', 'r25_L2com']
        subsample_str = 'AB'
    else:
        fields_cat = ['npstartA', 'npoutA', 'N', 'v_L2com', 'x_L2com'
                      ]  #, 'id', 'sigmav3d_L2com', 'r90_L2com', 'r25_L2com']
        subsample_str = 'A'

    # main progenitor fields of interest
    fields_cat_mp = [
        'haloindex', 'haloindex_mainprog', 'v_L2com_mainprog', 'N_mainprog'
    ]

    # get functions relating chi and z
    chi_of_z = interp1d(zs_all, chis_all)
    etad_of_chi = interp1d(chis_all, etad_all)
    z_of_chi = interp1d(chis_all, zs_all)

    # initial redshift where we start building the trees
    ind_start = np.argmin(np.abs(zs_mt - z_start))
    ind_stop = np.argmin(np.abs(zs_mt - z_stop))

    # directory where we save the current state
    os.makedirs(cat_lc_dir / "tmp", exist_ok=True)
    if purge:
        # delete the exisiting temporary files
        tmp_files = list((cat_lc_dir / "tmp").glob("haloindex_*"))
        for i in range(len(tmp_files)):
            os.unlink(str(tmp_files[i]))

    # loop over each merger tree redshift
    for i in range(ind_start, ind_stop + 1):

        # starting snapshot
        z_mt = zs_mt[i]
        z_mt_mp = zs_mt[i + 1]
        z_cat = zs_cat[np.argmin(np.abs(z_mt - zs_cat))]
        print("Redshift = %.3f %.3f" % (z_mt, z_cat))

        # the names of the folders need to be standardized
        zname_mt = min(header['L1OutputRedshifts'],
                       key=lambda z: abs(z - z_mt))

        # convert the redshifts into comoving distance
        chi_mt = chi_of_z(z_mt)
        chi_mt_mp = chi_of_z(z_mt_mp)

        # catalog directory
        catdir = cat_dir / ("z%.3f" % z_cat)

        # names of the merger tree file for this redshift
        merger_fns = list(merger_dir.glob("associations_z%4.3f.*.asdf" % z_mt))
        for counter in range(len(merger_fns)):
            merger_fns[counter] = str(merger_fns[counter])

        # slab indices and number of halos per slab
        N_halo_slabs, slabs = get_halos_per_slab(merger_fns, minified=False)
        N_halo_total = np.sum(N_halo_slabs)

        # names of the light cone merger tree file for this redshift
        merger_lc_fns = list(
            (cat_lc_dir / ("z%.3f" % zname_mt)).glob("Merger_lc*.asdf"))
        for counter in range(len(merger_lc_fns)):
            merger_lc_fns[counter] = str(merger_lc_fns[counter])

        # slab indices, origins and number of halos per slab
        N_halo_slabs_lc, slabs_lc, origins_lc = get_halos_per_slab_origin(
            merger_lc_fns, minified=False)

        # total number of halos in this light cone redshift
        N_lc = np.sum(N_halo_slabs_lc)
        print("total number of lc halos = ", N_lc)
        if N_lc == 0: continue

        # create a new dictionary with translations of merger names
        key_dic = {
            'HaloIndex': ['index_halo', np.int64],
            'InterpolatedPosition': ['pos_interp', (np.float32, 3)],
            'InterpolatedVelocity': ['vel_interp', (np.float32, 3)],
            'InterpolatedComoving': ['redshift_interp', np.float32],
            'LightConeOrigin': ['origin', np.int8],
        }

        # Merger_lc should have all fields (compaso + mainprog (not anymore) + interpolated)
        cols = {
            fields_cat[i]: np.zeros(N_lc, dtype=(user_dt[fields_cat[i]]))
            for i in range(len(fields_cat))
        }
        fields = []
        for i in range(len(fields_cat)):
            fields.append(fields_cat[i])

        # additional fields for the light cones
        for key in key_dic.keys():
            cols[key_dic[key][0]] = np.zeros(N_lc, dtype=key_dic[key][1])

        # updating the mainprog here
        with asdf.open(
                str(clean_dir / ("z%.3f" % z_cat) / 'cleaned_halo_info' /
                    'cleaned_halo_info_000.asdf')) as f:  # og
            # add mainprog stuff to the raw dictionary
            for key in fields_cat_mp:
                try:
                    raw_dic[key] = (f['data'][key].dtype,
                                    f['data'][key].shape[1])
                except:
                    raw_dic[key] = f['data'][key].dtype

        # adding the raw halo info fields
        for key in raw_dic.keys():
            cols[key] = np.zeros(N_lc, dtype=raw_dic[key])
        # adding interpolated mass
        cols['N_interp'] = np.zeros(N_lc, dtype=user_dt['N'])
        Merger_lc = Table(cols, copy=False)

        # if we want to complete to z = 0, then turn on complete for z = 0.1 (we don't have shells past that)
        if complete and np.abs(z_mt - 0.1) < 1.e-3:
            save_z0 = True
        else:
            save_z0 = False

        # initialize index for filling halo information
        start = 0
        file_no = 0

        # offset for correcting halo indices
        offset = 0

        # counts particles
        count = 0

        # loop over each superslab
        for k in range(n_superslabs):
            # assert superslab number is correct
            assert slabs[k] == k, "the superslabs are not matching"

            # origins for which information is available
            origins_k = origins_lc[slabs_lc == k]

            if len(origins_k) == 0:
                # offset all halos in given superslab
                offset += N_halo_slabs[k]
                continue

            # list of halo indices
            halo_info_list = []
            for i in [0, 1, -1]:
                # TESTING depending on whether B particles are in normal location or sownak's or mine
                #halo_info_list.append(str(catdir / 'halo_info' / ('halo_info_%03d.asdf'%((k+i)%n_superslabs)))) # og
                #halo_info_list.append(str(Path("/global/cscratch1/sd/sbose/subsample_B_particles") / sim_name / "halos"/ ("z%.3f"%z_cat) / 'halo_info' / ('halo_info_%03d.asdf'%((k+i)%n_superslabs)))) # tova e po-razlichno i se polzva kogato e cleaning/cleaned_halos (t.e. chasticite sa na mainata si)
                halo_info_list.append(
                    str(
                        Path(
                            "/global/cscratch1/sd/boryanah/data_hybrid/tape_data"
                        ) / sim_name / "halos" / ("z%.3f" % z_cat) /
                        'halo_info' / ('halo_info_%03d.asdf' %
                                       ((k + i) % n_superslabs)))
                )  # ako sownak si iztrie tupite chastici
            # adding merger tree fields
            cleaned_halo_info_list = []
            for i in [0, 1, -1]:
                cleaned_halo_info_list.append(
                    str(clean_dir / ("z%.3f" % z_cat) / 'cleaned_halo_info' /
                        ('cleaned_halo_info_%03d.asdf' %
                         ((k + i) % n_superslabs))))

            print("loading halo info files = ", halo_info_list)
            print("loading fields = ", fields)
            # load the CompaSO catalogs
            if (save_pos or save_z0):
                try:
                    cat = CompaSOHaloCatalog(
                        halo_info_list,
                        load_subsamples=f'{subsample_str:s}_halo_all',
                        fields=fields,
                        unpack_bits=False)
                    loaded_pos = True
                except:
                    cat = CompaSOHaloCatalog(
                        halo_info_list,
                        load_subsamples=f'{subsample_str:s}_halo_pid',
                        fields=fields,
                        unpack_bits=False)
                    loaded_pos = False
            else:
                cat = CompaSOHaloCatalog(
                    halo_info_list,
                    load_subsamples=f'{subsample_str:s}_halo_pid',
                    fields=fields,
                    unpack_bits=False,
                    cleandir=str(compaso_parent / "cleaning"))
                #cat = CompaSOHaloCatalog(halo_info_list, load_subsamples=f'{subsample_str:s}_halo_pid', fields=fields, unpack_bits=False, cleaned=False)
                loaded_pos = False

            # load the rest of the parameters in compressed format
            cols = {}
            for key in raw_dic.keys():
                cols[key] = np.zeros(len(cat.halos), dtype=raw_dic[key])
            compressed_data = Table(cols, copy=False)
            new_count = 0
            for i in range(len(halo_info_list)):
                with asdf.open(halo_info_list[i]) as f:
                    for key in f['data'].keys():
                        if key in compressed_data.keys():
                            compressed_data[key][new_count:new_count +
                                                 len(f['data'][key]
                                                     )] = f['data'][key][:]
                    new_count += len(f['data'][key])
            # adding merger tree fields
            new_count = 0
            for i in range(len(cleaned_halo_info_list)):
                with asdf.open(cleaned_halo_info_list[i]) as f:
                    for key in f['data'].keys():
                        if key in fields_cat_mp:
                            compressed_data[key][new_count:new_count +
                                                 len(f['data'][key]
                                                     )] = f['data'][key][:]
                    new_count += len(f['data'][key])

            # loop over each observer origin
            for o in origins_k:

                # number of halos in this file
                num = N_halo_slabs_lc[file_no]
                file_no += 1

                print("origin, superslab, N_halo_slabs_lc", o, k, num)
                # skip if none
                if num == 0: continue

                # load the light cone arrays
                with asdf.open(cat_lc_dir / ("z%.3f" % zname_mt) /
                               ("Merger_lc%d.%02d.asdf" % (o, k)),
                               lazy_load=True,
                               copy_arrays=True) as f:
                    merger_lc = f['data']

                # the files should be congruent
                N_halo_lc = len(merger_lc['HaloIndex'])
                assert N_halo_lc == num, "file order is messed up"

                # translate information from this file to the complete array
                for key in merger_lc.keys():
                    Merger_lc[key_dic[key][0]][start:start +
                                               num] = merger_lc[key][:]

                # adding information about which lightcone the halo belongs to
                Merger_lc['origin'][start:start + num] = np.repeat(
                    o, num).astype(np.int8)

                # halo index and velocity
                halo_ind_lc = Merger_lc['index_halo'][start:start + num]
                halo_ind_lc = correct_all_inds(halo_ind_lc, N_halo_slabs,
                                               slabs, n_superslabs)
                halo_ind_lc = (halo_ind_lc - offset) % N_halo_total
                vel_interp_lc = Merger_lc['vel_interp'][start:start + num]

                # correct halo indices
                correction = N_halo_slabs[k] + N_halo_slabs[
                    (k + 1) % n_superslabs] + N_halo_slabs[
                        (k - 1) % n_superslabs] - N_halo_total
                halo_ind_lc[halo_ind_lc > N_halo_total -
                            N_halo_slabs[(k - 1) % n_superslabs]] += correction

                # cut the halos that are not part of this catalog from the halo table
                halo_table = cat.halos[halo_ind_lc]

                header = cat.header
                N_halos = len(cat.halos)
                print("N_halos = ", N_halos)
                assert N_halos == N_halo_total + correction, "mismatch between halo number in compaso catalog and in merger tree"

                # cut the halos that are not part of this catalog from the compressed data
                compressed_data_o = compressed_data[halo_ind_lc]

                # load eligibility information if it exists
                if os.path.exists(cat_lc_dir / "tmp" /
                                  ("haloindex_z%4.3f_lc%d.%02d.npy" %
                                   (z_mt, o, k))):
                    haloindex_ineligible = np.load(
                        cat_lc_dir / "tmp" /
                        ("haloindex_z%4.3f_lc%d.%02d.npy" % (z_mt, o, k)))

                    # find the halos in halo_table that have been marked ineligible and get rid of them
                    mask_ineligible = np.in1d(compressed_data_o['haloindex'],
                                              haloindex_ineligible)

                    # decided this is bad cause of the particle indexing or rather the halo indexing that uses num and then the total number of particles
                    #halo_table = halo_table[mask_ineligible]
                    halo_table['N'][mask_ineligible] = 0
                    halo_table['npstartA'][
                        mask_ineligible] = -999  # note unsigned integer
                    halo_table['npoutA'][mask_ineligible] = 0
                    if want_subsample_B:
                        halo_table['npstartB'][
                            mask_ineligible] = -999  # note unsigned integer
                        halo_table['npoutB'][mask_ineligible] = 0
                    print(
                        "percentage surviving halos after eligibility = ",
                        100. *
                        (1 - np.sum(mask_ineligible) / len(mask_ineligible)))

                # load the particle ids
                pid = cat.subsamples['pid']
                if (save_pos or save_z0) and loaded_pos:
                    pos = cat.subsamples['pos']
                    vel = cat.subsamples['vel']

                # reindex npstart and npout for the new catalogs
                npstartA = halo_table['npstartA']
                npoutA = halo_table['npoutA']
                # select the pids in this halo light cone, and index into them starting from 0
                if want_subsample_B:
                    npstartB = halo_table['npstartB']
                    npoutB = halo_table['npoutB']

                    if (save_pos or save_z0) and loaded_pos:
                        pid_new, pos_new, vel_new, npstart_new, npout_new, npout_new_B = reindex_pid_pos_vel_AB(
                            pid, pos, vel, npstartA, npoutA, npstartB, npoutB)
                        del pid, pos, vel
                    else:
                        pid_new, npstart_new, npout_new, npout_new_B = reindex_pid_AB(
                            pid, npstartA, npoutA, npstartB, npoutB)
                        del pid
                    del npstartA, npoutA, npstartB, npoutB
                else:
                    if (save_pos or save_z0) and loaded_pos:
                        pid_new, pos_new, vel_new, npstart_new, npout_new = reindex_pid_pos_vel(
                            pid, pos, vel, npstartA, npoutA)
                        del pid, pos, vel
                    else:
                        pid_new, npstart_new, npout_new = reindex_pid(
                            pid, npstartA, npoutA)
                        del pid
                    del npstartA, npoutA

                # assert that indexing is right
                if want_subsample_B:
                    assert np.sum(npout_new + npout_new_B) == len(
                        pid_new), "mismatching indexing"
                else:
                    assert np.sum(npout_new) == len(
                        pid_new), "mismatching indexing"

                # offset for this superslab and origin
                Merger_lc['npstartA'][start:start + num] = npstart_new + count
                Merger_lc['npoutA'][start:start + num] = npout_new
                if want_subsample_B:
                    Merger_lc['npoutB'][start:start + num] = npout_new_B
                    del npout_new_B
                del npstart_new, npout_new

                # increment number of particles in superslab and origin
                count += len(pid_new)

                # create particle array
                if (save_pos or save_z0) and loaded_pos:
                    pid_table = Table({
                        'pid':
                        np.zeros(len(pid_new), pid_new.dtype),
                        'pos':
                        np.zeros((len(pid_new), 3), pos_new.dtype),
                        'vel':
                        np.zeros((len(pid_new), 3), vel_new.dtype)
                    })
                    pid_table['pid'] = pid_new
                    pid_table['pos'] = pos_new
                    pid_table['vel'] = vel_new
                    del pid_new, pos_new, vel_new
                else:
                    pid_table = Table(
                        {'pid': np.zeros(len(pid_new), pid_new.dtype)})
                    pid_table['pid'] = pid_new
                    del pid_new
                # save the particles
                save_asdf(pid_table, "pid_lc%d.%02d" % (o, k), header,
                          cat_lc_dir / ("z%4.3f" % zname_mt))
                del pid_table

                # for halos that did not have interpolation and get the velocity from the halo info files
                not_interp = (np.sum(np.abs(vel_interp_lc), axis=1) -
                              0.) < 1.e-6
                print("percentage not interpolated = ",
                      100. * np.sum(not_interp) / len(not_interp))
                vel_interp_lc[not_interp] = halo_table['v_L2com'][not_interp]

                # halos with merger tree info (0 for merged or smol, -999 for no info)
                mask_info = compressed_data_o['haloindex_mainprog'][:] > 0
                print("percentage without merger tree info = ",
                      100. * (1. - np.sum(mask_info) / len(mask_info)))
                print("percentage of removed halos = ",
                      np.sum(halo_table['N'] == 0) * 100. / len(mask_info))
                # I think that it may be possible that because in later redshifts (not z_start of build_mt),
                # we have halos from past times, so it is possible that at some point some halo had merger tree
                # info and then it got lost somewhere; also we have the new condition of going back half a lifetime
                # the first number is larger than the sum of the second two cause it contains other cases (split)
                assert np.sum(~mask_info) >= np.sum(not_interp) + np.sum(
                    halo_table['N'] == 0
                ), "Different number of halos with merger tree info and halos that have been interpolated"
                del not_interp

                # interpolated velocity v = v1 + (v2-v1)/(chi1-chi2)*(chi-chi2) because -d(chi) = d(eta)
                a_avg = (halo_table['v_L2com'] -
                         compressed_data_o['v_L2com_mainprog']) / (chi_mt_mp -
                                                                   chi_mt)
                v_star = compressed_data_o['v_L2com_mainprog'] + a_avg * (
                    chi_mt_mp - merger_lc['InterpolatedComoving'][:, None])
                vel_interp_lc[mask_info] = v_star[mask_info]
                del a_avg, v_star

                # save the velocity information
                Merger_lc['vel_interp'][start:start + num] = vel_interp_lc
                del vel_interp_lc

                # interpolated mass m = m1 + (m2-m1)/(chi1-chi2)*(chi-chi2) because dt = -dchi
                # compute the derivative
                try:
                    mdot = (halo_table['N'].astype(float) -
                            compressed_data_o['N_mainprog'][:, 0].astype(float)
                            ) / (chi_mt_mp - chi_mt)
                    m_star = compressed_data_o['N_mainprog'][:, 0].astype(
                        float) + mdot * (chi_mt_mp -
                                         merger_lc['InterpolatedComoving'])
                except:
                    # this is only needed if you are using the last available redshift for which N_mainprog is 1D
                    mdot = (halo_table['N'].astype(float) -
                            compressed_data_o['N_mainprog'].astype(float)) / (
                                chi_mt_mp - chi_mt)
                    m_star = compressed_data_o['N_mainprog'].astype(
                        float) + mdot * (chi_mt_mp -
                                         merger_lc['InterpolatedComoving'])

                # getting rid of negative masses which occur for halos with mass today = 0 or halos that come from the previous redshift (i.e. 1/2 to 1 and not 1 to 3/2)
                m_star[m_star < 0.] = 0.
                m_star = np.round(m_star).astype(halo_table['N'].dtype)
                # record the interpolated mass for each halo
                Merger_lc['N_interp'][start:start +
                                      num][mask_info] = m_star[mask_info]

                # mark the halos that don't have merger tree info
                Merger_lc['origin'][start:start + num][~mask_info] += 3

                # for these halos, we can pseudo interpolate their position but keep the mass unchanged
                Merger_lc['N_interp'][start:start + num][
                    ~mask_info] = halo_table['N'][~mask_info]
                # buba's try
                #Merger_lc['pos_interp'][start:start+num][~mask_info] = merger_lc['InterpolatedPosition'][~mask_info]# + halo_table['v_L2com'][~mask_info]*(chi_mt - merger_lc['InterpolatedComoving'][:, None])[~mask_info]
                # simulation particle with canonical velocity v1 drifting from z1 to z2, advance the position as: x2 = x1 + v1*(etaD(z2) - etaD(z1)). The eta_Ds are the drift factors, computed as \Delta etaD = \int_t1^t2 dt/a^2 and are stored in the state headers, with velocities in canonical units, and x1 and x2 in unit-box comoving coords.
                tmp = (merger_lc['InterpolatedComoving'][~mask_info])
                tmp[tmp < np.min(chis_all)] = np.min(chis_all)
                merger_lc['InterpolatedComoving'][~mask_info] = tmp
                del tmp
                Merger_lc['pos_interp'][start:start + num][~mask_info] = (
                    merger_lc['InterpolatedPosition'][~mask_info] /
                    header['BoxSizeHMpc'] +
                    compressed_data_o['v_L2com'][~mask_info] *
                    header['VelZSpace_to_Canonical'] *
                    (etad_of_chi(merger_lc['InterpolatedComoving'][~mask_info,
                                                                   None]) -
                     etad_of_chi(chi_mt))) * header['BoxSizeHMpc']
                # + halo_table['v_L2com'][~mask_info]*(chi_mt - merger_lc['InterpolatedComoving'][:, None])[~mask_info]

                # units -- todo: test
                del m_star, mdot

                # copy the rest of the halo fields
                for key in fields_cat:
                    # from the CompaSO fields, those have already been reindexed
                    if key == 'npstartA' or key == 'npoutA': continue
                    if key == 'npstartB' or key == 'npoutB': continue
                    Merger_lc[key][start:start + num] = halo_table[key][:]

                # copy all L2com compressed fields to Merger_lc
                for key in compressed_data.keys():
                    Merger_lc[key][start:start +
                                   num] = compressed_data_o[key][:]

                # save information about halos that were used in this catalog and have merger tree information
                np.save(
                    cat_lc_dir / "tmp" / ("haloindex_z%4.3f_lc%d.%02d.npy" %
                                          (z_mt_mp, o, k)),
                    compressed_data_o['haloindex_mainprog'][mask_info])
                del mask_info
                del halo_table

                # add halos in this file
                start += num

            # offset all halos in given superslab
            offset += N_halo_slabs[k]
            del cat

        assert len(Merger_lc['redshift_interp']
                   ) == start, "Are you missing some halos?"
        # since at z = 0.1 some of the values are too low
        Merger_lc['redshift_interp'][
            Merger_lc['redshift_interp'] < np.min(chis_all)] = np.min(chis_all)
        Merger_lc['redshift_interp'] = z_of_chi(
            Merger_lc['redshift_interp']).astype(np.float32)

        # save to files
        save_asdf(Merger_lc, "halo_info_lc", header,
                  cat_lc_dir / ("z%4.3f" % zname_mt))
        del Merger_lc

        # loop over each superslab
        file_no = 0
        offset = 0
        for k in range(n_superslabs):
            # origins for which information is available
            origins_k = origins_lc[slabs_lc == k]

            # loop over each observer origin
            for o in origins_k:

                with asdf.open(cat_lc_dir / ("z%4.3f" % zname_mt) /
                               ("pid_lc%d.%02d.asdf" % (o, k)),
                               lazy_load=True,
                               copy_arrays=True) as f:
                    pid_lc = f['data']['pid'][:]
                    if (save_pos or save_z0) and loaded_pos:
                        pos_lc = f['data']['pos'][:]
                        vel_lc = f['data']['vel'][:]
                if file_no == 0:
                    if (save_pos or save_z0) and loaded_pos:
                        pid_table = Table({
                            'pid':
                            np.zeros(count, pid_lc.dtype),
                            'pos':
                            np.zeros((count, 3), pos_lc.dtype),
                            'vel':
                            np.zeros((count, 3), vel_lc.dtype)
                        })
                    else:
                        pid_table = Table(
                            {'pid': np.zeros(count, pid_lc.dtype)})

                pid_table['pid'][offset:offset + len(pid_lc)] = pid_lc
                if (save_pos or save_z0) and loaded_pos:
                    pid_table['pos'][offset:offset + len(pid_lc)] = pos_lc
                    pid_table['vel'][offset:offset + len(pid_lc)] = vel_lc
                file_no += 1
                offset += len(pid_lc)
        assert offset == count, "Missing particles somewhere"
        save_asdf(pid_table, "pid_lc", header,
                  cat_lc_dir / ("z%4.3f" % zname_mt))

        gc.collect()
Exemplo n.º 21
0
def prepare_slab(i, savedir, simdir, simname, z_mock, tracer_flags, MT, want_ranks, N_dim, newseed):
    outfilename_halos = savedir+'/halos_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushod'
    outfilename_particles = savedir+'/particles_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushod'
    if MT:
        outfilename_halos += '_MT'
        outfilename_particles += '_MT'
    if want_ranks:
        outfilename_particles += '_withranks'
    outfilename_particles += '_new.h5'
    outfilename_halos += '_new.h5'

    np.random.seed(newseed + i)
    # # if file already exists, just skip
    # if os.path.exists(outfilename_halos) \
    # and os.path.exists(outfilename_particles):
    #     return 0

    # load the halo catalog slab
    print("loading halo catalog ")
    start = time.time()
    cat = CompaSOHaloCatalog(
        simdir+simname+'/halos/z'+str(z_mock).ljust(5, '0')+'/halo_info/halo_info_'\
        +str(i).zfill(3)+'.asdf', load_subsamples = 'A_halo_rv', fields = ['N', 
        'x_L2com', 'v_L2com', 'r90_L2com', 'r25_L2com', 'npstartA', 'npoutA', 'id', 'sigmav3d_L2com'])
    halos = cat.halos
    parts = cat.subsamples
    header = cat.header
    Lbox = cat.header['BoxSizeHMpc']
    Mpart = header['ParticleMassHMsun'] # msun / h 
    H0 = header['H0']
    h = H0/100.0
    print("finished loading halo catalog", time.time() - start)
    print("number of halos ", len(halos), "max halo mass", np.max(halos['N']) * Mpart,
        "min halo mass", np.min(halos['N']) * Mpart, "particle mass ", Mpart)
    # # form a halo table of the columns i care about 
    # creating a mask of which halos to keep, which halos to drop
    p_halos = subsample_halos(halos['N']*Mpart, MT)
    mask_halos = np.random.random(len(halos)) < p_halos
    print("total number of halos, ", len(halos), "keeping ", np.sum(mask_halos))

    halos['mask_subsample'] = mask_halos
    halos['multi_halos'] = 1.0 / p_halos

    nbins = 100
    mbins = np.logspace(np.log10(3e10), 15.5, nbins + 1)

    print("computing density rank")
    start = time.time()
    dens_grid = np.array(h5py.File(savedir+"/density_field.h5", 'r')['dens'])
    ixs = np.floor((np.array(halos['x_L2com']) + Lbox/2) / (Lbox/N_dim)).astype(np.int) % N_dim
    halos_overdens = dens_grid[ixs[:, 0], ixs[:, 1], ixs[:, 2]]
    print("done overdensity array")
    fenv_rank = np.zeros(len(halos))
    for ibin in range(nbins):
        mmask = (halos['N']*Mpart > mbins[ibin]) & (halos['N']*Mpart < mbins[ibin + 1])
        if np.sum(mmask) > 0:
            if np.sum(mmask) == 1:
                fenv_rank[mmask] = 0
            else:
                new_fenv_rank = halos_overdens[mmask].argsort().argsort()
                fenv_rank[mmask] = new_fenv_rank / np.max(new_fenv_rank) - 0.5
    halos['fenv_rank'] = fenv_rank
    print("finished density rank", time.time() - start)

    # compute delta concentration
    print("computing c rank")
    start = time.time()
    halos_c = halos['r90_L2com']/halos['r25_L2com']
    deltac_rank = np.zeros(len(halos))
    for ibin in range(nbins):
        mmask = (halos['N']*Mpart > mbins[ibin]) & (halos['N']*Mpart < mbins[ibin + 1])
        if np.sum(mmask) > 0:
            if np.sum(mmask) == 1:
                deltac_rank[mmask] = 0
            else:
                new_deltac = halos_c[mmask] - np.median(halos_c[mmask])
                new_deltac_rank = new_deltac.argsort().argsort()
                deltac_rank[mmask] = new_deltac_rank / np.max(new_deltac_rank) - 0.5
    halos['deltac_rank'] = deltac_rank
    print("finished delta c", time.time() - start)

    # the new particle start, len, and multiplier
    halos_pstart = halos['npstartA']
    halos_pnum = halos['npoutA']
    halos_pstart_new = np.zeros(len(halos))
    halos_pnum_new = np.zeros(len(halos))

    # particle arrays for ranks and mask 
    mask_parts = np.zeros(len(parts))
    len_old = len(parts)
    ranks_parts = np.full(len_old, -1.0)
    ranksv_parts = np.full(len_old, -1.0)
    ranksr_parts = np.full(len_old, -1.0)
    ranksp_parts = np.full(len_old, -1.0)
    pos_parts = np.full((len_old, 3), -1.0)
    vel_parts = np.full((len_old, 3), -1.0)
    hvel_parts = np.full((len_old, 3), -1.0)
    Mh_parts = np.full(len_old, -1.0)
    Np_parts = np.full(len_old, -1.0)
    downsample_parts = np.full(len_old, -1.0)
    idh_parts = np.full(len_old, -1.0)
    deltach_parts = np.full(len_old, -1.0)
    fenvh_parts = np.full(len_old, -1.0)

    print("compiling particle subsamples")
    start_tracker = 0
    for j in np.arange(len(halos)):
        if j % 10000 == 0:
            print("halo id", j, end = '\r')
        if mask_halos[j]:
            # updating the mask tagging the particles we want to preserve
            subsample_factor = subsample_particles(halos['N'][j] * Mpart, MT)
            submask = np.random.binomial(n = 1, p = subsample_factor, size = halos_pnum[j])
            # updating the particles' masks, downsample factors, halo mass
            mask_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = submask
            # print(j, halos_pstart, halos_pnum, p_halos, downsample_parts)
            downsample_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = p_halos[j]
            hvel_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = halos['v_L2com'][j]
            Mh_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = halos['N'][j] * Mpart # in msun / h
            Np_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = np.sum(submask)
            idh_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = halos['id'][j]
            deltach_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = deltac_rank[j]
            fenvh_parts[halos_pstart[j]: halos_pstart[j] + halos_pnum[j]] = fenv_rank[j]

            # updating the pstart, pnum, for the halos
            halos_pstart_new[j] = start_tracker
            halos_pnum_new[j] = np.sum(submask)
            start_tracker += np.sum(submask)

            if want_ranks:
                if np.sum(submask) == 0:
                    continue
                # extract particle index
                indices_parts = np.arange(
                    halos_pstart[j], halos_pstart[j] + halos_pnum[j])[submask.astype(bool)]
                indices_parts = indices_parts.astype(int)
                if np.sum(submask) == 1:
                    ranks_parts[indices_parts] = 0
                    ranksv_parts[indices_parts] = 0
                    ranksp_parts[indices_parts] = 0
                    ranksr_parts[indices_parts] = 0
                    continue
                
                # make the rankings
                theseparts = parts[
                    halos_pstart[j]: halos_pstart[j] + halos_pnum[j]][submask.astype(bool)]
                theseparts_pos = theseparts['pos']
                theseparts_vel = theseparts['vel']
                theseparts_halo_pos = halos['x_L2com'][j]
                theseparts_halo_vel = halos['v_L2com'][j]

                dist2_rel = np.sum((theseparts_pos - theseparts_halo_pos)**2, axis = 1)
                newranks = dist2_rel.argsort().argsort() 
                ranks_parts[indices_parts] = (newranks - np.mean(newranks)) / np.mean(newranks)

                v2_rel = np.sum((theseparts_vel - theseparts_halo_vel)**2, axis = 1)
                newranksv = v2_rel.argsort().argsort() 
                ranksv_parts[indices_parts] = (newranksv - np.mean(newranksv)) / np.mean(newranksv)

                # get rps
                # calc relative positions
                r_rel = theseparts_pos - theseparts_halo_pos 
                r0 = np.sqrt(np.sum(r_rel**2, axis = 1))
                r_rel_norm = r_rel/r0[:, None]

                # list of peculiar velocities of the particles
                vels_rel = theseparts_vel - theseparts_halo_vel # velocity km/s
                # relative speed to halo center squared
                v_rel2 = np.sum(vels_rel**2, axis = 1) 

                # calculate radial and tangential peculiar velocity
                vel_rad = np.sum(vels_rel*r_rel_norm, axis = 1)
                newranksr = vel_rad.argsort().argsort() 
                ranksr_parts[indices_parts] = (newranksr - np.mean(newranksr)) / np.mean(newranksr)

                # radial component
                v_rad2 = vel_rad**2 # speed
                # tangential component
                v_tan2 = v_rel2 - v_rad2

                # compute the perihelion distance for NFW profile
                m = halos['N'][j]*Mpart / h # in kg
                rs = halos['r25_L2com'][j]
                c = halos['r90_L2com'][j]/rs
                r0_kpc = r0*1000 # kpc
                alpha = 1.0/(np.log(1+c)-c/(1+c))*2*6.67e-11*m*2e30/r0_kpc/3.086e+19/1e6

                # iterate a few times to solve for rp
                x2 = v_tan2/(v_tan2+v_rad2)

                num_iters = 20 # how many iterations do we want
                factorA = v_tan2 + v_rad2
                factorB = np.log(1+r0_kpc/rs)
                for it in range(num_iters):
                    oldx = np.sqrt(x2)
                    x2 = v_tan2/(factorA + alpha*(np.log(1+oldx*r0_kpc/rs)/oldx - factorB))
                x2[np.isnan(x2)] = 1
                # final perihelion distance 
                rp2 = r0_kpc**2*x2
                newranksp = rp2.argsort().argsort() 
                ranksp_parts[indices_parts] = (newranksp - np.mean(newranksp)) / np.mean(newranksp)

        else:
            halos_pstart_new[j] = -1
            halos_pnum_new[j] = -1

    halos['npstartA'] = halos_pstart_new
    halos['npoutA'] = halos_pnum_new
    halos['randoms'] = np.random.random(len(halos)) # attaching random numbers
    halos['randoms_gaus_vrms'] = np.random.normal(loc = 0, 
        scale = halos["sigmav3d_L2com"]/np.sqrt(3), size = len(halos)) # attaching random numbers

    # output halo file 
    print("outputting new halo file ")
    # output_dir = savedir+'/halos_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushodMT_new.h5'
    if os.path.exists(outfilename_halos):
        os.remove(outfilename_halos)
    print(outfilename_halos, outfilename_particles)
    newfile = h5py.File(outfilename_halos, 'w')
    dataset = newfile.create_dataset('halos', data = halos[mask_halos])
    newfile.close()

    # output the new particle file
    print("adding rank fields to particle data ")
    mask_parts = mask_parts.astype(bool)
    parts = parts[mask_parts]
    print("pre process particle number ", len_old, " post process particle number ", len(parts))
    if want_ranks:
        parts['ranks'] = ranks_parts[mask_parts]
        parts['ranksv'] = ranksv_parts[mask_parts]
        parts['ranksr'] = ranksr_parts[mask_parts]
        parts['ranksp'] = ranksp_parts[mask_parts]
    parts['downsample_halo'] = downsample_parts[mask_parts]
    parts['halo_vel'] = hvel_parts[mask_parts]
    parts['halo_mass'] = Mh_parts[mask_parts]
    parts['Np'] = Np_parts[mask_parts]
    parts['halo_id'] = idh_parts[mask_parts]
    parts['randoms'] = np.random.random(len(parts))
    parts['halo_deltac'] = deltach_parts[mask_parts]
    parts['halo_fenv'] = fenvh_parts[mask_parts]

    print("are there any negative particle values? ", np.sum(parts['downsample_halo'] < 0), 
        np.sum(parts['halo_mass'] < 0))
    print("outputting new particle file ")
    # output_dir = savedir+'/particles_xcom_'+str(i)+'_seed'+str(newseed)+'_abacushodMT_new.h5'
    if os.path.exists(outfilename_particles):
        os.remove(outfilename_particles)
    newfile = h5py.File(outfilename_particles, 'w')
    dataset = newfile.create_dataset('particles', data = parts)
    newfile.close()

    print("pre process particle number ", len_old, " post process particle number ", len(parts))