-
Notifications
You must be signed in to change notification settings - Fork 0
/
mcf_analysis_tools.py
152 lines (142 loc) · 7.72 KB
/
mcf_analysis_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import numpy as np
from numpy.lib.recfunctions import append_fields, merge_arrays
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import halotools.mock_observables as mo
import halotools.sim_manager as sm
import scipy.stats as stats
class SpartaCatalog(object):
"""An ingested Sparta catalog with routines for carrying out necessary calculations on that catalog.
"""
def __init__(self, inpath, minmass, maxmass, masschoice, lbox):
"""Read a halo catalog generated in the sparta format using the tools from
halotools and generate any custom variables we need.
ARGS:
inpath (str): path to the input sparta ascii file.
minmass (float): minimum mass to cut the dataset at in Msun/h
maxmass (float): maximum mass to cut the dataset at in Msun/h
masschoice (str): string matching the dictionary value for a chosen mass definition
to apply cuts on.
lbox (float): the size of the box in Mpc/h for any calculations.
RETURN:
outdata (structued array): A numpy array of the initially modified data.
"""
# first define a dictionary of properties we are interested in.
rs_dict = {'halo_id':(1,'i8'), 'halo_upid':(6,'i8'), 'halo_M200b':(10,'f8'), 'halo_R200b':(11,'f8'),
'halo_rs':(12,'f8'), 'halo_vmax':(16,'f8'), 'halo_x':(17,'f8'), 'halo_y':(18,'f8'),
'halo_z':(19,'f8'), 'halo_spin':(26,'f8'), 'halo_c_to_a':(47,'f8'), 'halo_Acc_Rate_100Myr':(66,'f8'),
'halo_Acc_Rate_TDyne':(67,'f8'), 'halo_Acc_Rate_2TDyne':(68,'f8'), 'halo_Rspstatus':(80,'i8'),
'halo_upid_mean':(81,'i8'), 'halo_Rsp_mean':(82,'f8'), 'halo_Msp_mean':(84,'f8'),
'halo_upid_percentile75':(91,'i8'),'halo_Rsp_percentile75':(92,'f8'), 'halo_Msp_percentile75':(94,'f8'),
'halo_upid_percentile87':(96,'i8'),'halo_Rsp_percentile87':(97,'f8'),'halo_Msp_percentile87':(99,'f8')}
# Now we need to start the reader and create the data. Cut anything without Sparta information.
reader = sm.TabularAsciiReader(inpath, rs_dict, row_cut_eq_dict={'halo_Rspstatus':0},
row_cut_min_dict={masschoice:minmass}, row_cut_max_dict={masschoice:maxmass})
self.data = reader.read_ascii()
self.lbox = lbox
self.cores = 1
def calculate_cV(self, mdefchoice):
""" Calculate cV given a mass definition choice. Definition should string following M/R in definition.
"""
gnewton = 4.302e-6 # units for calculating cV
cV = self.data['halo_vmax']/np.sqrt(gnewton*self.data['halo_M'+mdefchoice]/self.data['halo_R'+mdefchoice])
return cV
def add_standard_properties(self):
""" Augments the dataset with a series of standard properties that we use in assembly bias
calculations.
"""
# 5 definitions of halo concentration.
cNFW200b = self.data['halo_R200b']/self.data['halo_rs']
cV200b = self.calculate_cV('200b')
cVsp_mean = self.calculate_cV('sp_mean')
cVsp_percentile75 = self.calculate_cV('sp_percentile75')
cVsp_percentile87 = self.calculate_cV('sp_percentile87')
# Plus several halo size ratios.
sizeratiosp87_200b = self.data['halo_Rsp_percentile87']/self.data['halo_R200b']
sizeratiosp75_200b = self.data['halo_Rsp_percentile75']/self.data['halo_R200b']
sizeratiospmean_200b = self.data['halo_Rsp_mean']/self.data['halo_R200b']
sizeratiosp87_spmean = self.data['halo_Rsp_percentile87']/self.data['halo_Rsp_mean']
# And the same mass ratios
massratiosp87_200b = self.data['halo_Msp_percentile87']/self.data['halo_M200b']
massratiosp75_200b = self.data['halo_Msp_percentile75']/self.data['halo_M200b']
massratiospmean_200b = self.data['halo_Msp_mean']/self.data['halo_M200b']
massratiosp87_spmean = self.data['halo_Msp_percentile87']/self.data['halo_Msp_mean']
# And randoms for calculation ease
uniformrands = np.random.uniform(0,1,len(self.data))
# Add these all into the data.
self.data = append_fields(self.data, ('halo_cNFW200b', 'halo_cV200b', 'halo_cVsp_mean',
'halo_cVsp_percentile75', 'halo_cVsp_percentile87', 'halo_sizeratiosp87_200b',
'halo_sizeratiosp75_200b', 'halo_sizeratiospmean_200b', 'halo_sizeratiosp87_spmean',
'halo_massratiosp87_200b', 'halo_massratiosp75_200b', 'halo_massratiospmean_200b',
'halo_massratiosp87_spmean', 'err_rands'), (cNFW200b, cV200b, cVsp_mean,
cVsp_percentile75, cVsp_percentile87, sizeratiosp87_200b, sizeratiosp75_200b,
sizeratiospmean_200b, sizeratiosp87_spmean, massratiosp87_200b, massratiosp75_200b,
massratiospmean_200b, massratiosp87_spmean, uniformrands), usemask=False)
def calculate_mcf(self, markname, norm=True, normbinning=20, mdefchoice='200b', rbinning=[3,20,10],
pidchoice='upid', nrand=200, recalc=False, recalc_rand=False, **kwargs):
""" Calculate the marked correlation function given a mark. Stores
the results existing values. The normalization flag allows for
whether or not to normalize the input marks with ranks.
The normbinning argument says how many bins to use in normalization.
Mdefchoice defines the mass definition in which to bin. The binning
argument consists of lower limit, upper limit, and number of bins
for the marked correlation function.
"""
if hasattr(self, 'mcf_'+markname) and recalc==False:
print('Previously calculated this mark! Did you mean to include recalc=True?\n')
return getattr(self, 'mcf_'+markname)
else:
mark = self.data['halo_'+markname][self.data['halo_'+pidchoice]==-1]
# first optional argument: whether or not to normalize.
if norm==True:
logmass = np.log10(self.data['halo_M'+mdefchoice][self.data['halo_'+pidchoice]==-1])
bins = np.linspace(logmass.min(), logmass.max(), normbinning)
idx = np.digitize(logmass, bins)
for i in range(1, normbinning+1):
ranks = np.zeros(len(logmass[idx==i]))
ranks = stats.rankdata(mark[idx==i], 'average') / len(ranks)
mark[idx==i]=ranks
# set up binning information
minlog = np.log10(rbinning[0])
maxlog = np.log10(rbinning[1])
nstep = rbinning[2]
steplog = (maxlog - minlog) / (nstep-1)
logbins = np.linspace(minlog, maxlog, num=nstep+1)
binmids = np.zeros(nstep)
for i in range(0,nstep):
binmids[i] = (logbins[i]+logbins[i+1])/2.
self.binmids_last = binmids
pos = np.vstack((self.data['halo_x'][self.data['halo_'+pidchoice]==-1], self.data['halo_y'][self.data['halo_'+pidchoice]==-1], self.data['halo_z'][self.data['halo_'+pidchoice]==-1])).T
# check if we have calculated errors before:
if not hasattr(self, 'mcf_lowererr') or recalc_rand==True:
mcfn_rand = np.zeros((nstep, nrand))
for i in range(0, nrand):
randerr = np.random.permutation(self.data['err_rands'][self.data['halo_'+pidchoice]==-1])
mcfn_rand[:,i] = (mo.marked_tpcf(pos, 10**logbins, marks1=randerr,
period=self.lbox, normalize_by='number_counts', weight_func_id=1,
num_threads=self.cores)-.5**2)/(1./12.)
self.mcf_uppererr = [np.percentile(mcfn_rand[i,:],98) for i in range(nstep)]
self.mcf_lowererr = [np.percentile(mcfn_rand[i,:],2) for i in range(nstep)]
self.mcf_rands = mcfn_rand
mcf_temp = mo.marked_tpcf(pos, 10**logbins, marks1=mark, period=self.lbox, normalize_by='number_counts',
weight_func_id=1, num_threads=self.cores)
mcfn_temp = (mcf_temp - .5**2)/(1./12.)
setattr(self,'mcf_'+markname, mcfn_temp)
return mcfn_temp
class PlotObject:
def __init__(self, *catalogs):
self.num_catalogs = len(catalogs)
self.__catlist = [cat for cat in catalogs]
print(f'{self.num_catalogs} catalogs ingested for plotting.')
def plot_mcf(self, haloprop, **kwargs):
color = iter(cm.Set1(np.linspace(0,1,9)))
for cat in self.__catlist:
mcf = cat.calculate_mcf(haloprop, **kwargs)
binmids = cat.binmids_last
lowererr = cat.mcf_lowererr
uppererr = cat.mcf_uppererr
c=next(color)
plt.semilogx(binmids, mcf, c=c, linewidth=3.0)
plt.fill_between(binmids, lowererr, uppererr, color=c, alpha=0.05)
plt.xlabel(r'r')
plt.ylabel(r'$\mathcal{M}$')