def PlotCovariance(obvs, Mr=21, b_normal=0.25, inference='mcmc'): ''' Plot the covariance matrix for a specified obvs ''' # import the covariance matrix covar = Data.data_cov(Mr=Mr, b_normal=b_normal, inference=inference) if obvs == 'xi': obvs_cov = covar[1:16 , 1:16] r_bin = Data.xi_binedges() elif obvs == 'gmf': obvs_cov = covar[17:, 17:] binedges = Data.data_gmf_bins() r_bin = 0.5 * (binedges[:-1] + binedges[1:]) n_bin = int(np.sqrt(obvs_cov.size)) # calculate the reduced covariance for plotting red_covar = np.zeros([n_bin, n_bin]) for ii in range(n_bin): for jj in range(n_bin): red_covar[ii][jj] = obvs_cov[ii][jj]/np.sqrt(obvs_cov[ii][ii] * obvs_cov[jj][jj]) prettyplot() fig = plt.figure() sub = fig.add_subplot(111) cont = sub.pcolormesh(r_bin, r_bin, red_covar, cmap=plt.cm.afmhot_r) plt.colorbar(cont) sub.set_xlim([r_bin[0], r_bin[-1]]) sub.set_ylim([r_bin[0], r_bin[-1]]) sub.set_xscale('log') sub.set_yscale('log') sub.set_xlabel(r'$\mathtt{r}\;[\mathtt{Mpc/h}$]', fontsize=25) sub.set_ylabel(r'$\mathtt{r}\;[\mathtt{Mpc/h}$]', fontsize=25) fig_file = ''.join([util.fig_dir(), obvs.upper(), 'covariance', '.Mr', str(Mr), '.bnorm', str(round(b_normal,2)), '.', inference, '_inf.png']) fig.savefig(fig_file, bbox_inches='tight') plt.close() return None
def plot_data(Mr = 21 , output_dir = None): ''' plot summary statistics of the data ''' if output_dir is None: fig_dir = util.fig_dir() else: fig_dir = output_dir cov = Data.data_cov() xi_err = np.sqrt(np.diag(cov)[1:16]) gmf_err = np.sqrt(np.diag(cov)[16:]) fig , axes = plt.subplots(1, 2, figsize=(10, 5)) fig.subplots_adjust(wspace=0.4, hspace=0.4) ax = axes[0] x = Data.xi_binedges() y = Data.data_xi() ax.errorbar(0.5*(x[:-1]+x[1:]), y, yerr=xi_err, fmt=".k", capsize=0) ax.set_yscale('log') ax.set_xscale('log') ax.set_xlim(0.2, 22) ax.set_xlabel(r'$r[\mathrm{Mpc}/h]$') ax.set_ylabel(r'$\xi(r)$') ax = axes[1] x = Data.gmf_bins() y = Data.data_gmf() ax.errorbar(0.5*(x[:-1]+x[1:]), y, yerr=gmf_err, fmt=".k", capsize=0) ax.set_xlim(1, 20) ax.set_yscale('log') ax.set_xlabel(r'Group Richness $N$') ax.set_ylabel(r'$\zeta(N)$') fig_file = ''.join([fig_dir, "data", '_Mr', str(Mr),'.pdf']) plt.savefig(fig_file) plt.close()
def plot_data(Mr=21, output_dir=None): ''' plot summary statistics of the data ''' if output_dir is None: fig_dir = util.fig_dir() else: fig_dir = output_dir cov = Data.data_cov() xi_err = np.sqrt(np.diag(cov)[1:16]) gmf_err = np.sqrt(np.diag(cov)[16:]) fig, axes = plt.subplots(1, 2, figsize=(10, 5)) fig.subplots_adjust(wspace=0.4, hspace=0.4) ax = axes[0] x = Data.xi_binedges() y = Data.data_xi() ax.errorbar(0.5 * (x[:-1] + x[1:]), y, yerr=xi_err, fmt=".k", capsize=0) ax.set_yscale('log') ax.set_xscale('log') ax.set_xlim(0.2, 22) ax.set_xlabel(r'$r[\mathrm{Mpc}/h]$') ax.set_ylabel(r'$\xi(r)$') ax = axes[1] x = Data.gmf_bins() y = Data.data_gmf() ax.errorbar(0.5 * (x[:-1] + x[1:]), y, yerr=gmf_err, fmt=".k", capsize=0) ax.set_xlim(1, 20) ax.set_yscale('log') ax.set_xlabel(r'Group Richness $N$') ax.set_ylabel(r'$\zeta(N)$') fig_file = ''.join([fig_dir, "data", '_Mr', str(Mr), '.pdf']) plt.savefig(fig_file) plt.close()
def Subvolume_FullvolumeCut(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the 2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions and actual pair counts, CUT into subvolumes of the same size *AFTER* populate mock Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test', '.Nsub', str(N_sub), '.p']) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() # Entire MultiDark Volume (No Periodic Boundary Conditions) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax , rmax , rmax] full_approx_cellran_size = [rmax , rmax , rmax] model.populate_mock(halocat, enforce_PBC=False) subvol_id = util.mk_id_column(table=model.mock.galaxy_table) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Full Volume if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') full_randoms = data_random(box='md_all') full_RR = data_RR(box='md_all') full_NR = len(full_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax , rmax , rmax] full_approx_cellran_size = [rmax , rmax , rmax] model.populate_mock(halocat, enforce_PBC=False) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') full_xi = tpcf( full_pos, xi_bin, randoms=full_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(full_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed = full_RR, NR_precomputed = full_NR) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Full Volume') if os.path.isfile(pickle_file): fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list'] fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg'] else: data_dump['fullcut_xi'] = {} fullcut_xi_list = [] fullcut_xi_tot = np.zeros(len(xi_bin)-1) for id in np.unique(subvol_id)[:N_sub]: print 'Subvolume ', id in_cut = np.where(subvol_id == id) fullcut_pos = full_pos[in_cut] fullcut_xi = tpcf( fullcut_pos, xi_bin, randoms=sub_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) fullcut_xi_list.append(fullcut_xi) fullcut_xi_tot += fullcut_xi fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub) data_dump['fullcut_xi']['fullcut_xi_list']= fullcut_xi_list data_dump['fullcut_xi']['fullcut_xi_avg']= fullcut_xi_avg if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg/full_xi, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1,N_sub): print 'Subvolume ', ii # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi , zi = util.random_shifter(rint) temp_randoms = sub_randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi rmax = xi_bin.max() sub_approx_cell1_size = [rmax , rmax , rmax] sub_approx_cellran_size = [rmax , rmax , rmax] sub_xi = tpcf( pos, xi_bin, randoms=temp_randoms, period = None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size = sub_approx_cell1_size, approx_cellran_size = sub_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) label = None if ii == N_sub - 1: label = 'Subvolumes' sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis/np.float(N_sub) data_dump['Natural'] = {} data_dump['Natural']['sub_xi_avg'] = sub_xi_avg data_dump['Natural']['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump['Natural']['sub_xis_list'] sub_xi_avg = data_dump['Natural']['sub_xi_avg'] if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png']) else: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png']) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def Subvolume_Analytic(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the analytic 2PCF for the entire MultiDark volume Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub', str(N_sub), '.p']) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: # Entire MultiDark Volume (Analytic xi) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') model.populate_mock(halocat) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # while the estimator claims to be Landy-Szalay, I highly suspect it # actually uses Landy-Szalay since DR pairs cannot be calculated from # analytic randoms full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar') RR = data_RR() randoms = data_random() NR = len(randoms) for method in ['Landy-Szalay', 'Natural']: if method == 'Landy-Szalay': iii = 3 elif method == 'Natural': iii = 5 if not os.path.isfile(pickle_file): sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1,N_sub+1): # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi , zi = util.random_shifter(rint) temp_randoms = randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi rmax = xi_bin.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] sub_xi = tpcf( pos, xi_bin, pos, randoms=temp_randoms, period = None, max_sample_size=int(1e5), estimator=method, approx_cell1_size = approx_cell1_size, approx_cellran_size = approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) label = None if ii == N_sub - 1: label = 'Subvolumes' #if not ratio: # sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii]) sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis/np.float(N_sub) data_dump[method] = {} data_dump[method]['sub_xi_avg'] = sub_xi_avg data_dump[method]['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump[method]['sub_xis_list'] sub_xi_avg = data_dump[method]['sub_xi_avg'] if not ratio: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method) else: sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method) if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png']) else: fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png']) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] if prior_range is None: self.model.populate_mock(self.halocat, enforce_PBC=False) pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000.**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w, gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf(pos, rbins, pos, randoms=randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=self.RR, NR_precomputed=self.NR) obvs.append(greek_xi) else: raise NotImplementedError( 'Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:, 0] < theta) & (theta < prior_range[:, 1])): # if all theta_i is within prior range ... try: self.model.populate_mock(self.halocat) pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1. / 3) groups = pyfof.friends_of_friends(pos, b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w, gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=randoms, period=None, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=self.RR, NR_precomputed=self.NR) obvs.append(greek_xi) else: raise NotImplementedError( 'Only nbar, tpcf, and gmf are implemented so far' ) return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1] * 1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1] * 1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] if prior_range is None: self.model.populate_mock(self.halocat) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000.**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, period=self.model.mock.Lbox, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])): # if all theta_i is within prior range ... try: self.model.populate_mock(self.halocat) pos=three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 1000**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 1000**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (1000.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, period=self.model.mock.Lbox, max_sample_size=int(3e5), estimator='Natural', approx_cell1_size=approx_cell1_size) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far') return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']): ''' Given theta, sum_stat calculates the observables from our forward model Parameters ---------- theta : (self explanatory) prior_range : If specified, checks to make sure that theta is within the prior range. ''' self.model.param_dict['logM0'] = theta[0] self.model.param_dict['sigma_logM'] = np.exp(theta[1]) self.model.param_dict['logMmin'] = theta[2] self.model.param_dict['alpha'] = theta[3] self.model.param_dict['logM1'] = theta[4] rbins = xi_binedges() rmax = rbins.max() period = None approx_cell1_size = [rmax , rmax , rmax] approx_cellran_size = [rmax , rmax , rmax] if prior_range is None: rint = np.random.randint(1, 125) ####simsubvol = lambda x: util.mask_func(x, rint) ####self.model.populate_mock(self.halocat, #### masking_function=simsubvol, #### enforce_PBC=False) self.model.populate_mock(self.halocat) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') pos = util.mask_galaxy_table(pos , rint) xi , yi , zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': #compute group richness nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins = data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period = period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = self.RR, NR_precomputed = self.NR) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar 2pcf, gmf implemented so far') return obvs else: if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])): # if all theta_i is within prior range ... try: rint = np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) self.model.populate_mock(self.halocat, masking_function=simsubvol, enforce_PBC=False) pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z') #imposing mask on the galaxy table pos = util.mask_galaxy_table(pos , rint) xi , yi , zi = util.random_shifter(rint) temp_randoms = self.randoms.copy() temp_randoms[:,0] += xi temp_randoms[:,1] += yi temp_randoms[:,2] += zi obvs = [] for obv in observables: if obv == 'nbar': obvs.append(len(pos) / 200**3.) # nbar of the galaxy catalog elif obv == 'gmf': nbar = len(pos) / 200**3. b = self.b_normal * (nbar)**(-1./3) groups = pyfof.friends_of_friends(pos , b) w = np.array([len(x) for x in groups]) gbins =data_gmf_bins() gmf = np.histogram(w , gbins)[0] / (200.**3.) obvs.append(gmf) elif obv == 'xi': greek_xi = tpcf( pos, rbins, pos, randoms=temp_randoms, period = period, max_sample_size=int(1e5), estimator='Natural', approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed = self.RR, NR_precomputed = self.NR) obvs.append(greek_xi) else: raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far') return obvs except ValueError: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs else: obvs = [] for obv in observables: if obv == 'nbar': obvs.append(10.) elif obv == 'gmf': bins = data_gmf_bins() obvs.append(np.ones_like(bins)[:-1]*1000.) elif obv == 'xi': obvs.append(np.zeros(len(xi_binedges()[:-1]))) return obvs
def Subvolume_FullvolumeCut(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the 2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions and actual pair counts, CUT into subvolumes of the same size *AFTER* populate mock Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test', '.Nsub', str(N_sub), '.p' ]) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() # Entire MultiDark Volume (No Periodic Boundary Conditions) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax, rmax, rmax] full_approx_cellran_size = [rmax, rmax, rmax] model.populate_mock(halocat, enforce_PBC=False) subvol_id = util.mk_id_column(table=model.mock.galaxy_table) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # Full Volume if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') full_randoms = data_random(box='md_all') full_RR = data_RR(box='md_all') full_NR = len(full_randoms) rmax = xi_bin.max() full_approx_cell1_size = [rmax, rmax, rmax] full_approx_cellran_size = [rmax, rmax, rmax] model.populate_mock(halocat, enforce_PBC=False) full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') full_xi = tpcf(full_pos, xi_bin, randoms=full_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(full_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=full_RR, NR_precomputed=full_NR) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Full Volume') if os.path.isfile(pickle_file): fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list'] fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg'] else: data_dump['fullcut_xi'] = {} fullcut_xi_list = [] fullcut_xi_tot = np.zeros(len(xi_bin) - 1) for id in np.unique(subvol_id)[:N_sub]: print 'Subvolume ', id in_cut = np.where(subvol_id == id) fullcut_pos = full_pos[in_cut] fullcut_xi = tpcf(fullcut_pos, xi_bin, randoms=sub_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', approx_cell1_size=full_approx_cell1_size, approx_cellran_size=full_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) fullcut_xi_list.append(fullcut_xi) fullcut_xi_tot += fullcut_xi fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub) data_dump['fullcut_xi']['fullcut_xi_list'] = fullcut_xi_list data_dump['fullcut_xi']['fullcut_xi_avg'] = fullcut_xi_avg if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), fullcut_xi_avg, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), fullcut_xi_avg / full_xi, lw=2, ls='-', c='k', label=r'Full Volume Cut Average') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') sub_RR = data_RR(box='md_sub') sub_randoms = data_random(box='md_sub') sub_NR = len(sub_randoms) sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1, N_sub): print 'Subvolume ', ii # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = sub_randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi rmax = xi_bin.max() sub_approx_cell1_size = [rmax, rmax, rmax] sub_approx_cellran_size = [rmax, rmax, rmax] sub_xi = tpcf(pos, xi_bin, randoms=temp_randoms, period=None, do_auto=True, do_cross=False, num_threads=5, max_sample_size=int(pos.shape[0]), estimator='Natural', approx_cell1_size=sub_approx_cell1_size, approx_cellran_size=sub_approx_cellran_size, RR_precomputed=sub_RR, NR_precomputed=sub_NR) label = None if ii == N_sub - 1: label = 'Subvolumes' sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis / np.float(N_sub) data_dump['Natural'] = {} data_dump['Natural']['sub_xi_avg'] = sub_xi_avg data_dump['Natural']['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump['Natural']['sub_xis_list'] sub_xi_avg = data_dump['Natural']['sub_xi_avg'] if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg / full_xi, lw=2, ls='--', c=pretty_colors[3], label='Subvolume') sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png' ]) else: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png' ]) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None
def Subvolume_Analytic(N_sub, ratio=False): ''' Test the 2PCF estimates from MultiDark subvolume versus the analytic 2PCF for the entire MultiDark volume Parameters ---------- N_sub : (int) Number of subvolumes to sample ''' prettyplot() pretty_colors = prettycolors() pickle_file = ''.join([ '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub', str(N_sub), '.p' ]) fig = plt.figure(1) sub = fig.add_subplot(111) xi_bin = xi_binedges() if os.path.isfile(pickle_file): data_dump = pickle.load(open(pickle_file, 'rb')) full_xi = data_dump['full_xi'] else: # Entire MultiDark Volume (Analytic xi) model = PrebuiltHodModelFactory('zheng07', threshold=-21) halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') model.populate_mock(halocat) pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z') # while the estimator claims to be Landy-Szalay, I highly suspect it # actually uses Landy-Szalay since DR pairs cannot be calculated from # analytic randoms full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1) data_dump = {} data_dump['full_xi'] = full_xi if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') if not os.path.isfile(pickle_file): # MultiDark SubVolume (precomputed RR pairs) sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21) sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column} sub_halocat = CachedHaloCatalog(simname='multidark', redshift=0, halo_finder='rockstar') RR = data_RR() randoms = data_random() NR = len(randoms) for method in ['Landy-Szalay', 'Natural']: if method == 'Landy-Szalay': iii = 3 elif method == 'Natural': iii = 5 if not os.path.isfile(pickle_file): sub_xis_list = [] sub_xis = np.zeros(len(full_xi)) for ii in range(1, N_sub + 1): # randomly sample one of the subvolumes rint = ii #np.random.randint(1, 125) simsubvol = lambda x: util.mask_func(x, rint) sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False) pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z') xi, yi, zi = util.random_shifter(rint) temp_randoms = randoms.copy() temp_randoms[:, 0] += xi temp_randoms[:, 1] += yi temp_randoms[:, 2] += zi rmax = xi_bin.max() approx_cell1_size = [rmax, rmax, rmax] approx_cellran_size = [rmax, rmax, rmax] sub_xi = tpcf(pos, xi_bin, pos, randoms=temp_randoms, period=None, max_sample_size=int(1e5), estimator=method, approx_cell1_size=approx_cell1_size, approx_cellran_size=approx_cellran_size, RR_precomputed=RR, NR_precomputed=NR) label = None if ii == N_sub - 1: label = 'Subvolumes' #if not ratio: # sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii]) sub_xis += sub_xi sub_xis_list.append(sub_xi) sub_xi_avg = sub_xis / np.float(N_sub) data_dump[method] = {} data_dump[method]['sub_xi_avg'] = sub_xi_avg data_dump[method]['sub_xis_list'] = sub_xis_list else: sub_xis_list = data_dump[method]['sub_xis_list'] sub_xi_avg = data_dump[method]['sub_xi_avg'] if not ratio: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume ' + method) else: sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]), sub_xi_avg / full_xi, lw=2, ls='--', c=pretty_colors[iii], label='Subvolume ' + method) if not os.path.isfile(pickle_file): pickle.dump(data_dump, open(pickle_file, 'wb')) sub.set_xlim([0.1, 50.]) sub.set_xlabel('r', fontsize=30) sub.set_xscale('log') if not ratio: sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25) sub.set_yscale('log') else: sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25) sub.legend(loc='lower left') if ratio: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png' ]) else: fig_file = ''.join([ util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png' ]) fig.savefig(fig_file, bbox_inches='tight', dpi=100) plt.close() return None