def __init__(self, path, data, command_line): Likelihood_prior.__init__(self, path, data, command_line) # Check if there are conflicting experiments for experiment in self.conflicting_experiments: if experiment in data.experiments: raise io_mp.LikelihoodError( "Current prior on M should only used for Pantheon supernovae, not JLA." )
def __init__(self, path, data, command_line): # I should already take care of using only GRF mocks or data here (because of different folder-structures etc...) # or for now just write it for GRFs for tests and worry about it later... Likelihood.__init__(self, path, data, command_line) # Check if the data can be found try: fname = os.path.join(self.data_directory, 'Resetting_bias/parameters_B_mode_model.dat') parser_mp.existing_file(fname) except: raise io_mp.ConfigurationError( 'KiDS-450 QE data not found. Download the data at ' 'http://kids.strw.leidenuniv.nl/sciencedata.php ' 'and specify path to data through the variable ' 'kids450_qe_likelihood_public.data_directory in ' 'the .data file. See README in likelihood folder ' 'for further instructions.') # TODO: this is also CFHTLenS legacy... # only relevant for GRFs! #dict_BWM = {'W1': 'G10_', 'W2': 'G126_', 'W3': 'G162_', 'W4': 'G84_'} self.need_cosmo_arguments(data, {'output': 'mPk'}) self.redshift_bins = [] for index_zbin in xrange(len(self.zbin_min)): redshift_bin = '{:.2f}z{:.2f}'.format(self.zbin_min[index_zbin], self.zbin_max[index_zbin]) self.redshift_bins.append(redshift_bin) # number of z-bins self.nzbins = len(self.redshift_bins) # number of *unique* correlations between z-bins self.nzcorrs = self.nzbins * (self.nzbins + 1) / 2 all_bands_EE_to_use = [] all_bands_BB_to_use = [] ''' if self.fit_cross_correlations_only: # mask out auto-spectra: for index_zbin1 in xrange(self.nzbins): for index_zbin2 in xrange(index_zbin1 + 1): if index_zbin1 == index_zbin2: all_bands_EE_to_use += np.zeros_like(self.bands_EE_to_use).tolist() all_bands_BB_to_use += np.zeros_like(self.bands_BB_to_use).tolist() else: all_bands_EE_to_use += self.bands_EE_to_use all_bands_BB_to_use += self.bands_BB_to_use else: # default, use all correlations: for i in xrange(self.nzcorrs): all_bands_EE_to_use += self.bands_EE_to_use all_bands_BB_to_use += self.bands_BB_to_use ''' # default, use all correlations: for i in xrange(self.nzcorrs): all_bands_EE_to_use += self.bands_EE_to_use all_bands_BB_to_use += self.bands_BB_to_use all_bands_to_use = np.concatenate( (all_bands_EE_to_use, all_bands_BB_to_use)) self.indices_for_bands_to_use = np.where( np.asarray(all_bands_to_use) == 1)[0] # this is also the number of points in the datavector ndata = len(self.indices_for_bands_to_use) # I should load all the data needed only once, i.e. HERE: # not so sure about statement above, I have the feeling "init" is called for every MCMC step... # maybe that's why the memory is filling up on other machines?! --> nope, that wasn't the reason... start_load = time.time() if self.correct_resetting_bias: fname = os.path.join(self.data_directory, 'Resetting_bias/parameters_B_mode_model.dat') A_B_modes, exp_B_modes, err_A_B_modes, err_exp_B_modes = np.loadtxt( fname, unpack=True) self.params_resetting_bias = np.array([A_B_modes, exp_B_modes]) fname = os.path.join(self.data_directory, 'Resetting_bias/covariance_B_mode_model.dat') self.cov_resetting_bias = np.loadtxt(fname) # try to load fiducial m-corrections from file (currently these are global values over full field, hence no looping over fields required for that!) # TODO: Make output dependent on field, not necessary for current KiDS approach though! try: fname = os.path.join( self.data_directory, '{:}zbins/m_correction_avg.txt'.format(self.nzbins)) if self.nzbins == 1: self.m_corr_fiducial_per_zbin = np.asarray( [np.loadtxt(fname, usecols=[1])]) else: self.m_corr_fiducial_per_zbin = np.loadtxt(fname, usecols=[1]) except: self.m_corr_fiducial_per_zbin = np.zeros(self.nzbins) print('Could not load m-correction values from \n', fname) print('Setting them to zero instead.') try: fname = os.path.join( self.data_directory, '{:}zbins/sigma_int_n_eff_{:}zbins.dat'.format( self.nzbins, self.nzbins)) tbdata = np.loadtxt(fname) if self.nzbins == 1: # correct columns for file! sigma_e1 = np.asarray([tbdata[2]]) sigma_e2 = np.asarray([tbdata[3]]) n_eff = np.asarray([tbdata[4]]) else: # correct columns for file! sigma_e1 = tbdata[:, 2] sigma_e2 = tbdata[:, 3] n_eff = tbdata[:, 4] self.sigma_e = np.sqrt((sigma_e1**2 + sigma_e2**2) / 2.) # convert from 1 / sq. arcmin to 1 / sterad self.n_eff = n_eff / np.deg2rad(1. / 60.)**2 except: # these dummies will set noise power always to 0! self.sigma_e = np.zeros(self.nzbins) self.n_eff = np.ones(self.nzbins) print('Could not load sigma_e and n_eff!') collect_bp_EE_in_zbins = [] collect_bp_BB_in_zbins = [] # collect BP per zbin and combine into one array for zbin1 in xrange(self.nzbins): for zbin2 in xrange(zbin1 + 1): #self.nzbins): # zbin2 first in fname! fname_EE = os.path.join( self.data_directory, '{:}zbins/band_powers_EE_z{:}xz{:}.dat'.format( self.nzbins, zbin1 + 1, zbin2 + 1)) fname_BB = os.path.join( self.data_directory, '{:}zbins/band_powers_BB_z{:}xz{:}.dat'.format( self.nzbins, zbin1 + 1, zbin2 + 1)) extracted_band_powers_EE = np.loadtxt(fname_EE) extracted_band_powers_BB = np.loadtxt(fname_BB) collect_bp_EE_in_zbins.append(extracted_band_powers_EE) collect_bp_BB_in_zbins.append(extracted_band_powers_BB) self.band_powers = np.concatenate( (np.asarray(collect_bp_EE_in_zbins).flatten(), np.asarray(collect_bp_BB_in_zbins).flatten())) fname = os.path.join( self.data_directory, '{:}zbins/covariance_all_z_EE_BB.dat'.format(self.nzbins)) self.covariance = np.loadtxt(fname) fname = os.path.join( self.data_directory, '{:}zbins/band_window_matrix_nell100.dat'.format(self.nzbins)) self.band_window_matrix = np.loadtxt(fname) # ells_intp and also band_offset are consistent between different patches! fname = os.path.join( self.data_directory, '{:}zbins/multipole_nodes_for_band_window_functions_nell100.dat'. format(self.nzbins)) self.ells_intp = np.loadtxt(fname) self.band_offset_EE = len(extracted_band_powers_EE) self.band_offset_BB = len(extracted_band_powers_BB) # Check if any of the n(z) needs to be shifted in loglkl by D_z{1...n}: self.shift_n_z_by_D_z = np.zeros(self.nzbins, 'bool') for zbin in xrange(self.nzbins): param_name = 'D_z{:}'.format(zbin + 1) if param_name in data.mcmc_parameters: self.shift_n_z_by_D_z[zbin] = True # Read fiducial dn_dz from window files: # TODO: the hardcoded z_min and z_max correspond to the lower and upper # endpoints of the shifted left-border histogram! z_samples = [] hist_samples = [] for zbin in xrange(self.nzbins): redshift_bin = self.redshift_bins[zbin] window_file_path = os.path.join( self.data_directory, '{:}/n_z_avg_{:}.hist'.format(self.photoz_method, redshift_bin)) if os.path.exists(window_file_path): zptemp, hist_pz = np.loadtxt(window_file_path, usecols=[0, 1], unpack=True) shift_to_midpoint = np.diff(zptemp)[0] / 2. if zbin > 0: zpcheck = zptemp if np.sum((zptemp - zpcheck)**2) > 1e-6: raise io_mp.LikelihoodError( 'The redshift values for the window files at different bins do not match.' ) print('Loaded n(zbin{:}) from: \n'.format(zbin + 1), window_file_path) # we add a zero as first element because we want to integrate down to z = 0! z_samples += [ np.concatenate((np.zeros(1), zptemp + shift_to_midpoint)) ] hist_samples += [np.concatenate((np.zeros(1), hist_pz))] else: raise io_mp.LikelihoodError("File not found:\n %s" % window_file_path) z_samples = np.asarray(z_samples) hist_samples = np.asarray(hist_samples) # prevent undersampling of histograms! if self.nzmax < len(zptemp): print( "You're trying to integrate at lower resolution than supplied by the n(z) histograms. \n Increase nzmax! Aborting now..." ) exit() # if that's the case, we want to integrate at histogram resolution and need to account for # the extra zero entry added elif self.nzmax == len(zptemp): self.nzmax = z_samples.shape[1] # requires that z-spacing is always the same for all bins... self.redshifts = z_samples[0, :] print('Integrations performed at resolution of histogram!') # if we interpolate anyway at arbitrary resolution the extra 0 doesn't matter else: self.nzmax += 1 self.redshifts = np.linspace(z_samples.min(), z_samples.max(), self.nzmax) print('Integration performed at set nzmax resolution!') self.pz = np.zeros((self.nzmax, self.nzbins)) self.pz_norm = np.zeros(self.nzbins, 'float64') for zbin in xrange(self.nzbins): # we assume that the histograms loaded are given as left-border histograms # and that the z-spacing is the same for each histogram spline_pz = itp.splrep(z_samples[zbin, :], hist_samples[zbin, :]) #z_mod = self.z_p mask_min = self.redshifts >= z_samples[zbin, :].min() mask_max = self.redshifts <= z_samples[zbin, :].max() mask = mask_min & mask_max # points outside the z-range of the histograms are set to 0! self.pz[mask, zbin] = itp.splev(self.redshifts[mask], spline_pz) # Normalize selection functions dz = self.redshifts[1:] - self.redshifts[:-1] self.pz_norm[zbin] = np.sum( 0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz) self.z_max = self.redshifts.max() # k_max is arbitrary at the moment, since cosmology module is not calculated yet...TODO if self.mode == 'halofit': self.need_cosmo_arguments( data, { 'z_max_pk': self.z_max, 'output': 'mPk', 'non linear': self.mode, 'P_k_max_h/Mpc': self.k_max_h_by_Mpc }) else: self.need_cosmo_arguments( data, { 'z_max_pk': self.z_max, 'output': 'mPk', 'P_k_max_h/Mpc': self.k_max_h_by_Mpc }) print('Time for loading all data files:', time.time() - start_load) fname = os.path.join(self.data_directory, 'number_datapoints.txt') np.savetxt(fname, [ndata], header='number of datapoints in masked datavector') return
def __init__(self, path, data, command_line): Likelihood.__init__(self, path, data, command_line) print("Initializing Lya likelihood") self.need_cosmo_arguments(data, {'output': 'mPk'}) self.need_cosmo_arguments(data, {'P_k_max_h/Mpc': 1.5 * self.kmax}) # number of grid points for the lcdm case (i.e. alpha=0, regardless of beta and gamma values), not needed #lcdm_points = 33 # number of non-astro params (i.e. alpha, beta, and gamma) self.params_numbers = 3 alphas = np.zeros(self.grid_size, 'float64') betas = np.zeros(self.grid_size, 'float64') gammas = np.zeros(self.grid_size, 'float64') # Derived_lkl is a new type of derived parameter calculated in the likelihood, and not known to class. # This first initialising avoids problems in the case of an error in the first point of the MCMC data.derived_lkl = {'alpha': 0, 'beta': 0, 'gamma': 0, 'lya_neff': 0} self.bin_file_path = os.path.join(command_line.folder, self.bin_file_name) if not os.path.exists(self.bin_file_path): with open(self.bin_file_path, 'w') as bin_file: bin_file.write('#') for name in data.get_mcmc_parameters(['varying']): name = re.sub('[$*&]', '', name) bin_file.write(' %s\t' % name) for name in data.get_mcmc_parameters(['derived']): name = re.sub('[$*&]', '', name) bin_file.write(' %s\t' % name) for name in data.get_mcmc_parameters(['derived_lkl']): name = re.sub('[$*&]', '', name) bin_file.write(' %s\t' % name) bin_file.write('\n') bin_file.close() if 'z_reio' not in data.get_mcmc_parameters([ 'derived' ]) or 'sigma8' not in data.get_mcmc_parameters(['derived']): raise io_mp.ConfigurationError( 'Error: Lya likelihood need z_reio and sigma8 as derived parameters' ) file_path = os.path.join(self.data_directory, self.grid_file) if os.path.exists(file_path): with open(file_path, 'r') as grid_file: line = grid_file.readline() while line.find('#') != -1: line = grid_file.readline() while (line.find('\n') != -1 and len(line) == 3): line = grid_file.readline() for index in range(self.grid_size): alphas[index] = float(line.split()[0]) betas[index] = float(line.split()[1]) gammas[index] = float(line.split()[2]) line = grid_file.readline() grid_file.close() else: raise io_mp.ConfigurationError('Error: grid file is missing') # Real parameters X_real = np.zeros((self.grid_size, self.params_numbers), 'float64') for k in range(self.grid_size): X_real[k][0] = self.khalf(alphas[k], betas[k], gammas[k]) # Here we use k_1/2 X_real[k][1] = betas[k] X_real[k][2] = gammas[k] # For the normalization self.a_min = min(X_real[:, 0]) self.b_min = min(X_real[:, 1]) self.g_min = min(X_real[:, 2]) self.a_max = max(X_real[:, 0]) self.b_max = max(X_real[:, 1]) self.g_max = max(X_real[:, 2]) # Redshift independent parameters - params order: z_reio, sigma_8, n_eff, f_UV self.zind_param_size = [3, 5, 5, 3] # How many values we have for each param self.zind_param_min = np.array([7., 0.5, -2.6, 0.]) self.zind_param_max = np.array([15., 1.5, -2.0, 1.]) zind_param_ref = np.array([9., 0.829, -2.3074, 0.]) self.zreio_range = self.zind_param_max[0] - self.zind_param_min[0] self.neff_range = self.zind_param_max[2] - self.zind_param_min[2] # Redshift dependent parameters - params order: params order: mean_f, t0, slope zdep_params_size = [9, 3, 3] # How many values we have for each param zdep_params_refpos = [4, 1, 2] # Where to store the P_F(ref) DATA # Mean flux values flux_ref_old = (np.array([ 0.669181, 0.617042, 0.564612, 0.512514, 0.461362, 0.411733, 0.364155, 0.253828, 0.146033, 0.0712724 ])) # Older, not used values #flux_min_meanf = (np.array([0.401509, 0.370225, 0.338767, 0.307509, 0.276817, 0.24704, 0.218493, 0.152297, 0.0876197, 0.0427634])) #flux_max_meanf = (np.array([0.936854, 0.863859, 0.790456, 0.71752, 0.645907, 0.576426, 0.509816, 0.355359, 0.204446, 0.0997813])) # Manage the data sets # FIRST (NOT USED) DATASET (19 wavenumbers) ***XQ-100*** self.zeta_range_XQ = [ 3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2 ] # List of redshifts corresponding to the 19 wavenumbers (k) self.k_XQ = [ 0.003, 0.006, 0.009, 0.012, 0.015, 0.018, 0.021, 0.024, 0.027, 0.03, 0.033, 0.036, 0.039, 0.042, 0.045, 0.048, 0.051, 0.054, 0.057 ] # SECOND DATASET (7 wavenumbers) ***HIRES/MIKE*** self.zeta_range_mh = [ 4.2, 4.6, 5.0, 5.4 ] # List of redshifts corresponding to the 7 wavenumbers (k) self.k_mh = [ 0.00501187, 0.00794328, 0.0125893, 0.0199526, 0.0316228, 0.0501187, 0.0794328 ] # Note that k is in s/km self.zeta_full_length = (len(self.zeta_range_XQ) + len(self.zeta_range_mh)) self.kappa_full_length = (len(self.k_XQ) + len(self.k_mh)) # Which snapshots we use (first 7 for first dataset, last 4 for second one) self.redshift = [3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.2, 4.6, 5.0, 5.4] #T 0 and slope values t0_ref_old = np.array([ 11251.5, 11293.6, 11229.0, 10944.6, 10421.8, 9934.49, 9227.31, 8270.68, 7890.68, 7959.4 ]) slope_ref_old = np.array([ 1.53919, 1.52894, 1.51756, 1.50382, 1.48922, 1.47706, 1.46909, 1.48025, 1.50814, 1.52578 ]) t0_values_old = np.zeros((10, zdep_params_size[1]), 'float64') t0_values_old[:, 0] = np.array([ 7522.4, 7512.0, 7428.1, 7193.32, 6815.25, 6480.96, 6029.94, 5501.17, 5343.59, 5423.34 ]) t0_values_old[:, 1] = t0_ref_old[:] t0_values_old[:, 2] = np.array([ 14990.1, 15089.6, 15063.4, 14759.3, 14136.3, 13526.2, 12581.2, 11164.9, 10479.4, 10462.6 ]) slope_values_old = np.zeros((10, zdep_params_size[2]), 'float64') slope_values_old[:, 0] = np.array([ 0.996715, 0.979594, 0.960804, 0.938975, 0.915208, 0.89345, 0.877893, 0.8884, 0.937664, 0.970259 ]) slope_values_old[:, 1] = [ 1.32706, 1.31447, 1.30014, 1.28335, 1.26545, 1.24965, 1.2392, 1.25092, 1.28657, 1.30854 ] slope_values_old[:, 2] = slope_ref_old[:] self.t0_min = t0_values_old[:, 0] * 0.1 self.t0_max = t0_values_old[:, 2] * 1.4 self.slope_min = slope_values_old[:, 0] * 0.8 self.slope_max = slope_values_old[:, 2] * 1.15 # Import the two grids for Kriging file_path = os.path.join(self.data_directory, self.astro_spectra_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') self.input_full_matrix_interpolated_ASTRO = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') self.input_full_matrix_interpolated_ASTRO = pickle.load( pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError( 'Error: astro spectra file is missing') file_path = os.path.join(self.data_directory, self.abg_spectra_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') self.input_full_matrix_interpolated_ABG = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') self.input_full_matrix_interpolated_ABG = pickle.load( pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError( 'Error: abg spectra file is missing') ALL_zdep_params = len(flux_ref_old) + len(t0_ref_old) + len( slope_ref_old) grid_length_ABG = len(self.input_full_matrix_interpolated_ABG[0, 0, :]) grid_length_ASTRO = len( self.input_full_matrix_interpolated_ASTRO[0, 0, :]) astroparams_number_KRIG = len(self.zind_param_size) + ALL_zdep_params # Import the ABG GRID (alpha, beta, gamma) file_path = os.path.join(self.data_directory, self.abg_grid_file) if os.path.exists(file_path): self.X_ABG = np.zeros((grid_length_ABG, self.params_numbers), 'float64') for param_index in range(self.params_numbers): self.X_ABG[:, param_index] = np.genfromtxt(file_path, usecols=[param_index], skip_header=1) else: raise io_mp.ConfigurationError('Error: abg grid file is missing') # Import the ASTRO GRID (ordering of params: z_reio, sigma_8, n_eff, f_UV, mean_f(z), t0(z), slope(z)) file_path = os.path.join(self.data_directory, self.abg_astro_grid_file) if os.path.exists(file_path): self.X = np.zeros((grid_length_ASTRO, astroparams_number_KRIG), 'float64') for param_index in range(astroparams_number_KRIG): self.X[:, param_index] = np.genfromtxt(file_path, usecols=[param_index], skip_header=1) else: raise io_mp.ConfigurationError( 'Error: abg+astro grid file is missing') # Prepare the interpolation in astro-param space self.redshift_list = np.array([ 3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.6, 5.0, 5.4 ]) # This corresponds to the combined dataset (MIKE/HIRES + XQ-100) self.F_prior_min = np.array([ 0.535345, 0.493634, 0.44921, 0.392273, 0.338578, 0.28871, 0.218493, 0.146675, 0.0676442, 0.0247793 ]) self.F_prior_max = np.array([ 0.803017, 0.748495, 0.709659, 0.669613, 0.628673, 0.587177, 0.545471, 0.439262, 0.315261, 0.204999 ]) # Load the data if not self.DATASET == "mike-hires": raise io_mp.LikelihoodError( 'Error: for the time being, only the mike - hires dataset is available' ) file_path = os.path.join(self.data_directory, self.MIKE_spectra_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') y_M_reshaped = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') y_M_reshaped = pickle.load(pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError( 'Error: MIKE spectra file is missing') file_path = os.path.join(self.data_directory, self.HIRES_spectra_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') y_H_reshaped = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') y_H_reshaped = pickle.load(pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError( 'Error: HIRES spectra file is missing') file_path = os.path.join(self.data_directory, self.MIKE_cov_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') cov_M_inverted = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') cov_M_inverted = pickle.load(pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError( 'Error: MIKE covariance matrix file is missing') file_path = os.path.join(self.data_directory, self.HIRES_cov_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') cov_H_inverted = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') cov_H_inverted = pickle.load(pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError( 'Error: HIRES covariance matrix file is missing') file_path = os.path.join(self.data_directory, self.PF_noPRACE_file) if os.path.exists(file_path): try: pkl = open(file_path, 'rb') self.PF_noPRACE = pickle.load(pkl) except UnicodeDecodeError as e: pkl = open(file_path, 'rb') self.PF_noPRACE = pickle.load(pkl, encoding='latin1') pkl.close() else: raise io_mp.ConfigurationError('Error: PF_noPRACE file is missing') self.cov_MH_inverted = block_diag(cov_H_inverted, cov_M_inverted) self.y_MH_reshaped = np.concatenate((y_H_reshaped, y_M_reshaped)) print("Initialization of Lya likelihood done")
def __init__(self, path, data, command_line): Likelihood.__init__(self, path, data, command_line) # Force the cosmological module to store Pk for redshifts up to # max(self.z) and for k up to k_max self.need_cosmo_arguments(data, {'output': 'mPk'}) self.need_cosmo_arguments(data, {'z_max_pk': self.zmax}) self.need_cosmo_arguments(data, {'P_k_max_h/Mpc': self.k_max_h_by_Mpc}) # Compute non-linear power spectrum if requested if (self.use_halofit): self.need_cosmo_arguments(data, {'non linear': 'halofit'}) # Define array of l values, and initialize them # It is a logspace # find nlmax in order to reach lmax with logarithmic steps dlnl self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1 # redefine slightly dlnl so that the last point is always exactly lmax self.dlnl = np.log(self.lmax) / (self.nlmax - 1) self.l = np.exp(self.dlnl * np.arange(self.nlmax)) # Read dn_dz from window files self.z_p = np.zeros(self.nzmax) zptemp = np.zeros(self.nzmax) self.p = np.zeros((self.nzmax, self.nbin)) for i in xrange(self.nbin): window_file_path = os.path.join(self.data_directory, self.window_file[i]) if os.path.exists(window_file_path): zptemp = np.loadtxt(window_file_path, usecols=[0]) if (i > 0 and np.sum((zptemp - self.z_p)**2) > 1e-6): raise io_mp.LikelihoodError( "The redshift values for the window files " "at different bins do not match") self.z_p = zptemp self.p[:, i] = np.loadtxt(window_file_path, usecols=[1]) else: raise io_mp.LikelihoodError("File not found:\n %s" % window_file_path) # Read measurements of xi+ and xi- nt = (self.nbin) * (self.nbin + 1) // 2 self.theta_bins = np.zeros(2 * self.ntheta) self.xi_obs = np.zeros(self.ntheta * nt * 2) xipm_file_path = os.path.join(self.data_directory, self.xipm_file) if os.path.exists(xipm_file_path): self.theta_bins = np.loadtxt(xipm_file_path)[:, 0] if (np.sum( (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:]) **2) > 1e-6): raise io_mp.LikelihoodError( "The angular values at which xi+ and xi- " "are observed do not match") temp = np.loadtxt(xipm_file_path)[:, 1:] else: raise io_mp.LikelihoodError("File not found:\n %s" % xipm_file_path) k = 0 for j in xrange(nt): for i in xrange(2 * self.ntheta): self.xi_obs[k] = temp[i, j] k = k + 1 # Read covariance matrix ndim = (self.ntheta) * (self.nbin) * (self.nbin + 1) covmat = np.zeros((ndim, ndim)) covmat_file_path = os.path.join(self.data_directory, self.covmat_file) if os.path.exists(covmat_file_path): covmat = np.loadtxt(covmat_file_path) else: raise io_mp.LikelihoodError("File not found:\n %s" % covmat_file_path) covmat = covmat / self.ah_factor # Read angular cut values (OPTIONAL) if (self.use_cut_theta): cut_values = np.zeros((self.nbin, 2)) cutvalues_file_path = os.path.join(self.data_directory, self.cutvalues_file) if os.path.exists(cutvalues_file_path): cut_values = np.loadtxt(cutvalues_file_path) else: raise io_mp.LikelihoodError("File not found:\n %s" % cutvalues_file_path) # Normalize selection functions self.p_norm = np.zeros(self.nbin, 'float64') for Bin in xrange(self.nbin): self.p_norm[Bin] = np.sum(0.5 * (self.p[1:, Bin] + self.p[:-1, Bin]) * (self.z_p[1:] - self.z_p[:-1])) # Compute theta mask if (self.use_cut_theta): mask = np.zeros(2 * nt * self.ntheta) iz = 0 for izl in xrange(self.nbin): for izh in xrange(izl, self.nbin): # this counts the bin combinations # iz=1 =>(1,1), iz=2 =>(1,2) etc iz = iz + 1 for i in xrange(self.ntheta): j = (iz - 1) * 2 * self.ntheta xi_plus_cut = max(cut_values[izl, 0], cut_values[izh, 0]) xi_minus_cut = max(cut_values[izl, 1], cut_values[izh, 1]) if (self.theta_bins[i] > xi_plus_cut): mask[j + i] = 1 if (self.theta_bins[i] > xi_minus_cut): mask[self.ntheta + j + i] = 1 else: mask = np.ones(2 * nt * self.ntheta) self.num_mask = int(np.sum(mask)) self.mask_indices = np.zeros(self.num_mask) j = 0 for i in xrange(self.ntheta * nt * 2): if (mask[i] == 1): self.mask_indices[j] = i j = j + 1 self.mask_indices = np.int32(self.mask_indices) # Precompute masked inverse self.wl_invcov = np.zeros((self.num_mask, self.num_mask)) self.wl_invcov = covmat[self.mask_indices][:, self.mask_indices] self.wl_invcov = np.linalg.inv(self.wl_invcov) # Fill array of discrete z values # self.z = np.linspace(0, self.zmax, num=self.nzmax) ################ # Noise spectrum ################ # Number of galaxies per steradian self.noise = 3600. * self.gal_per_sqarcmn * (180. / math.pi)**2 # Number of galaxies per steradian per bin self.noise = self.noise / self.nbin # Noise spectrum (diagonal in bin*bin space, independent of l and Bin) self.noise = self.rms_shear**2 / self.noise ################################################ # discrete theta values (to convert C_l to xi's) ################################################ thetamin = np.min(self.theta_bins) * 0.8 thetamax = np.max(self.theta_bins) * 1.2 self.nthetatot = np.ceil( math.log(thetamax / thetamin) / self.dlntheta) + 1 self.nthetatot = np.int32(self.nthetatot) self.theta = np.zeros(self.nthetatot, 'float64') self.a2r = math.pi / (180. * 60.) # define an array of theta's for it in xrange(self.nthetatot): self.theta[it] = thetamin * math.exp(self.dlntheta * it) ################################################################ # discrete l values used in the integral to convert C_l to xi's) ################################################################ # l = x / theta / self.a2r # x = l * theta * self.a2r # We start by considering the largest theta, theta[-1], and for that value we infer # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax. # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's, # always requiuring the same dx stepsize (so that dl does vary) up to xmax. # # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl. # Then we create the array lll[nl] and we fill it with the same values. # # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r # is the first value of x above xmax ll = 1. il = 0 while (ll * self.theta[-1] * self.a2r < self.dx_threshold): ll += self.dx_below_threshold / self.theta[-1] / self.a2r il += 1 for it in xrange(self.nthetatot): while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r < self.xmax) and (ll + self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r < self.lmax): ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r il += 1 self.nl = il + 1 self.lll = np.zeros(self.nl, 'float64') self.il_max = np.zeros(self.nthetatot, 'int') il = 0 self.lll[il] = 1. while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold): il += 1 self.lll[il] = self.lll[ il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r for it in xrange(self.nthetatot): while (self.lll[il] * self.theta[self.nthetatot - 1 - it] * self.a2r < self.xmax) and ( self.lll[il] + self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r < self.lmax): il += 1 self.lll[il] = self.lll[ il - 1] + self.dx_above_threshold / self.theta[ self.nthetatot - 1 - it] / self.a2r self.il_max[self.nthetatot - 1 - it] = il # finally we compute the array l*dl that will be used in the trapezoidal integration # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight) self.ldl = np.zeros(self.nl, 'float64') self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0]) for il in xrange(1, self.nl - 1): self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] - self.lll[il - 1]) self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2]) ##################################################################### # Allocation of various arrays filled and used in the function loglkl ##################################################################### self.r = np.zeros(self.nzmax, 'float64') self.dzdr = np.zeros(self.nzmax, 'float64') self.g = np.zeros((self.nzmax, self.nbin), 'float64') self.pk = np.zeros((self.nlmax, self.nzmax), 'float64') self.k_sigma = np.zeros(self.nzmax, 'float64') self.alpha = np.zeros((self.nlmax, self.nzmax), 'float64') if 'epsilon' in self.use_nuisance: self.E_th_nu = np.zeros((self.nlmax, self.nzmax), 'float64') self.nbin_pairs = self.nbin * (self.nbin + 1) // 2 self.Cl_integrand = np.zeros((self.nzmax, self.nbin_pairs), 'float64') self.Cl = np.zeros((self.nlmax, self.nbin_pairs), 'float64') if self.theoretical_error != 0: self.El_integrand = np.zeros((self.nzmax, self.nbin_pairs), 'float64') self.El = np.zeros((self.nlmax, self.nbin_pairs), 'float64') self.spline_Cl = np.empty(self.nbin_pairs, dtype=(list, 3)) self.xi1 = np.zeros((self.nthetatot, self.nbin_pairs), 'float64') self.xi2 = np.zeros((self.nthetatot, self.nbin_pairs), 'float64') self.Cll = np.zeros((self.nbin_pairs, self.nl), 'float64') self.BBessel0 = np.zeros(self.nl, 'float64') self.BBessel4 = np.zeros(self.nl, 'float64') self.xi1_theta = np.empty(self.nbin_pairs, dtype=(list, 3)) self.xi2_theta = np.empty(self.nbin_pairs, dtype=(list, 3)) self.xi = np.zeros(np.size(self.xi_obs), 'float64') return
def __init__(self, path, data, command_line): Likelihood.__init__(self, path, data, command_line) # Check if the data can be found, although we don't actually use that # particular file but take it as a placeholder for the folder try: fname = os.path.join( self.data_directory, 'DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat') parser_mp.existing_file(fname) except: raise io_mp.ConfigurationError( 'KiDS-450 CF data not found. Download the data at ' 'http://kids.strw.leidenuniv.nl/sciencedata.php ' 'and specify path to data through the variable ' 'kids450_cf_2cosmos_likelihood_public.data_directory in ' 'the .data file. See README in likelihood folder ' 'for further instructions.') # for loading of Nz-files: self.z_bins_min = [0.1, 0.3, 0.5, 0.7] self.z_bins_max = [0.3, 0.5, 0.7, 0.9] # number of angular bins in which xipm is measured # we always load the full data vector with 9 data points for xi_p and # xi_m each; they are cut to the fiducial scales (or any arbitrarily # defined scales with the 'cut_values.dat' files! self.ntheta = 9 # Force the cosmological module to store Pk for redshifts up to # max(self.z) and for k up to k_max self.need_cosmo1_arguments(data, {'output': 'mPk'}) self.need_cosmo1_arguments(data, {'P_k_max_h/Mpc': self.k_max_h_by_Mpc}) self.need_cosmo2_arguments(data, {'output': 'mPk'}) self.need_cosmo2_arguments(data, {'P_k_max_h/Mpc': self.k_max_h_by_Mpc}) # Compute non-linear power spectrum if requested: if self.method_non_linear_Pk in [ 'halofit', 'HALOFIT', 'Halofit', 'hmcode', 'Hmcode', 'HMcode', 'HMCODE' ]: self.need_cosmo1_arguments( data, {'non linear': self.method_non_linear_Pk}) self.need_cosmo2_arguments( data, {'non linear': self.method_non_linear_Pk}) print('Using {:} to obtain the non-linear P(k, z)!'.format( self.method_non_linear_Pk)) else: print( 'Only using the linear P(k, z) for ALL calculations \n (check keywords for "method_non_linear_Pk").' ) self.nzbins = len(self.z_bins_min) self.nzcorrs = self.nzbins * (self.nzbins + 1) // 2 # Create labels for loading of dn/dz-files: self.zbin_labels = [] for i in xrange(self.nzbins): self.zbin_labels += [ '{:.1f}t{:.1f}'.format(self.z_bins_min[i], self.z_bins_max[i]) ] # Define array of l values, and initialize them # It is a logspace # find nlmax in order to reach lmax with logarithmic steps dlnl self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1 # redefine slightly dlnl so that the last point is always exactly lmax self.dlnl = np.log(self.lmax) / (self.nlmax - 1) self.l = np.exp(self.dlnl * np.arange(self.nlmax)) #TODO: not really needed when bootstrap-errors are selected... # Read fiducial dn_dz from window files: # TODO: zmin and zmax are hardcoded to fiducial lower and upper limit # of midpoint histogram! self.z_p = np.linspace(0.025, 3.475, self.nzmax) self.pz = np.zeros((self.nzmax, self.nzbins)) self.pz_norm = np.zeros(self.nzbins, 'float64') for zbin in xrange(self.nzbins): window_file_path = os.path.join( self.data_directory, 'Nz_{0:}/Nz_{0:}_Mean/Nz_{0:}_z{1:}.asc'.format( self.nz_method, self.zbin_labels[zbin])) if os.path.exists(window_file_path): zptemp, hist_pz = np.loadtxt(window_file_path, usecols=[0, 1], unpack=True) if zbin > 0: zpcheck = zptemp if np.sum((zptemp - zpcheck)**2) > 1e-6: raise io_mp.LikelihoodError( 'The redshift values for the window files at different bins do not match.' ) print('Loaded n(zbin{:}) from: \n'.format(zbin + 1), window_file_path) # we assume that the histograms loaded are given as left-border histograms # and that the z-spacing is the same for each histogram shift_to_midpoint = np.diff(zptemp)[0] / 2. spline_pz = itp.splrep(zptemp + shift_to_midpoint, hist_pz) z_mod = self.z_p #+ self.shift_by_dz[zbin] mask_min = z_mod >= zptemp.min() mask_max = z_mod <= zptemp.max() mask = mask_min & mask_max self.pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz) # Normalize selection functions dz = self.z_p[1:] - self.z_p[:-1] self.pz_norm[zbin] = np.sum( 0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz) else: raise io_mp.LikelihoodError("File not found:\n %s" % window_file_path) self.zmax = self.z_p.max() self.need_cosmo1_arguments(data, {'z_max_pk': self.zmax}) self.need_cosmo2_arguments(data, {'z_max_pk': self.zmax}) # read in public data vector: temp = self.__load_public_data_vector() self.theta_bins = temp[:, 0] if (np.sum( (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:])**2) > 1e-6): raise io_mp.LikelihoodError( 'The angular values at which xi+ and xi- ' 'are observed do not match') # create the data-vector in the following format (due to covariance structure): # xi_obs = {xi1(theta1, z_11)...xi1(theta_k, z_11), xi2(theta_1, z_11)... # xi2(theta_k, z_11);...; xi1(theta1, z_nn)...xi1(theta_k, z_nn), # xi2(theta_1, z_nn)... xi2(theta_k, z_nn)} xi_obs = self.__get_xi_obs(temp[:, 1:]) # concatenate xi_obs with itself to create the ueberdata-vector: self.xi_obs_1 = xi_obs self.xi_obs_2 = xi_obs xi_obs_combined = np.concatenate((xi_obs, xi_obs)) # now load the full covariance matrix: covmat_block = self.__load_public_cov_mat() # build a combined cov-mat, for that to work we assume, that the cov-mat dimension fits # to the size of the *uncut*, single data-vector and is ordered in the same way as the # *final* data-vector created here (i.e. vec = [xi+(1,1), xi-(1,1), xi+(1,2), xi-(1,2),...]! covmat = np.asarray( np.bmat('covmat_block, covmat_block; covmat_block, covmat_block')) # Read angular cut values (OPTIONAL) # 1 --> fiducial scales # 2 --> large scales # Read angular cut values (OPTIONAL) if self.use_cut_theta: cut_values1 = np.zeros((self.nzbins, 2)) cut_values2 = np.zeros((self.nzbins, 2)) cutvalues_file_path1 = os.path.join( self.data_directory, 'CUT_VALUES/' + self.cutvalues_file1) if os.path.exists(cutvalues_file_path1): cut_values1 = np.loadtxt(cutvalues_file_path1) else: raise io_mp.LikelihoodError( 'File not found:\n {:} \n Check that requested file was copied to:\n {:}' .format(cutvalues_file_path1, self.data_directory + 'CUT_VALUES/')) cutvalues_file_path2 = os.path.join( self.data_directory, 'CUT_VALUES/' + self.cutvalues_file2) if os.path.exists(cutvalues_file_path2): cut_values2 = np.loadtxt(cutvalues_file_path2) else: raise io_mp.LikelihoodError( 'File not found:\n {:} \n Check that requested file was copied to:\n {:}' .format(cutvalues_file_path2, self.data_directory + 'CUT_VALUES/')) # Compute theta mask if self.use_cut_theta: mask1 = self.__get_mask(cut_values1) mask2 = self.__get_mask(cut_values2) else: mask1 = np.ones(2 * self.nzcorrs * self.ntheta) mask2 = np.ones(2 * self.nzcorrs * self.ntheta) #print(mask1, len(np.where(mask1 == 1)[0])) #print(mask2, len(np.where(mask2 == 1)[0])) # for tomographic splits: # e.g. # mask1 = fiducial # mask2 = z-bin 3 only (gives also all cross_powers) # --> mask1 = mask1 - mask2 --> all remaining bin combinations if self.subtract_mask2_from_mask1: mask1 = mask1 - mask2 #print(mask1, len(np.where(mask1 == 1)[0])) #print(mask2, len(np.where(mask2 == 1)[0])) self.mask_indices1 = np.where(mask1 == 1)[0] self.mask_indices2 = np.where(mask2 == 1)[0] # combine "fiducial" mask and "large scales" mask: # this is wrong, because indices in second half are only wrt. first half!!! #self.mask_indices = np.concatenate((self.mask_indices1, self.mask_indices2)) # combine "fiducial" mask and "large scales" mask: mask = np.concatenate((mask1, mask2)) self.mask_indices = np.where(mask == 1)[0] # apply equation 12 from Hildebrandt et al. 2017 to covmat: # this assumes that m-correction was already applied to data-vector! if self.marginalize_over_multiplicative_bias_uncertainty: cov_m_corr = np.matrix( xi_obs_combined[self.mask_indices]).T * np.matrix( xi_obs_combined[self.mask_indices] ) * 4. * self.err_multiplicative_bias**2 #covmat = covmat[self.mask_indices][:, self.mask_indices] + np.asarray(cov_m_corr) covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)] + np.asarray(cov_m_corr) else: #covmat = covmat[self.mask_indices][:, self.mask_indices] covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)] fname = self.data_directory + 'cov_matrix_ana_comb_cut.dat' np.savetxt(fname, covmat) print('Saved trimmed covariance to: \n', fname) # precompute Cholesky transform for chi^2 calculation: self.cholesky_transform = cholesky(covmat, lower=True) # Fill array of discrete z values # self.z = np.linspace(0, self.zmax, num=self.nzmax) ''' ################ # Noise spectrum ################ # only useful for theoretical signal # Number of galaxies per steradian self.noise = 3600.*self.gal_per_sqarcmn*(180./math.pi)**2 # Number of galaxies per steradian per bin self.noise = self.noise/self.nzbins # Noise spectrum (diagonal in bin*bin space, independent of l and Bin) self.noise = self.rms_shear**2/self.noise ''' ################################################ # discrete theta values (to convert C_l to xi's) ################################################ thetamin = np.min(self.theta_bins) * 0.8 thetamax = np.max(self.theta_bins) * 1.2 self.nthetatot = np.ceil( math.log(thetamax / thetamin) / self.dlntheta) + 1 self.nthetatot = np.int32(self.nthetatot) self.theta = np.zeros(self.nthetatot, 'float64') self.a2r = math.pi / (180. * 60.) # define an array of theta's for it in xrange(self.nthetatot): self.theta[it] = thetamin * math.exp(self.dlntheta * it) ################################################################ # discrete l values used in the integral to convert C_l to xi's) ################################################################ # l = x / theta / self.a2r # x = l * theta * self.a2r # We start by considering the largest theta, theta[-1], and for that value we infer # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax. # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's, # always requiuring the same dx stepsize (so that dl does vary) up to xmax. # # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl. # Then we create the array lll[nl] and we fill it with the same values. # # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r # is the first value of x above xmax ll = 1. il = 0 while (ll * self.theta[-1] * self.a2r < self.dx_threshold): ll += self.dx_below_threshold / self.theta[-1] / self.a2r il += 1 for it in xrange(self.nthetatot): while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r < self.xmax) and (ll + self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r < self.lmax): ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r il += 1 self.nl = il + 1 self.lll = np.zeros(self.nl, 'float64') self.il_max = np.zeros(self.nthetatot, 'int') il = 0 self.lll[il] = 1. while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold): il += 1 self.lll[il] = self.lll[ il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r for it in xrange(self.nthetatot): while (self.lll[il] * self.theta[self.nthetatot - 1 - it] * self.a2r < self.xmax) and ( self.lll[il] + self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r < self.lmax): il += 1 self.lll[il] = self.lll[ il - 1] + self.dx_above_threshold / self.theta[ self.nthetatot - 1 - it] / self.a2r self.il_max[self.nthetatot - 1 - it] = il # finally we compute the array l*dl that will be used in the trapezoidal integration # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight) self.ldl = np.zeros(self.nl, 'float64') self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0]) for il in xrange(1, self.nl - 1): self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] - self.lll[il - 1]) self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2]) return
def __init__(self, path, data, command_line): Likelihood.__init__(self, path, data, command_line) # Check if the data can be found, although we don't actually use that # particular file but take it as a placeholder for the folder try: fname = os.path.join( self.data_directory, 'DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat') parser_mp.existing_file(fname) except: raise io_mp.ConfigurationError( 'KiDS-450 CF data not found. Download the data at ' 'http://kids.strw.leidenuniv.nl/sciencedata.php ' 'and specify path to data through the variable ' 'kids450_cf_likelihood_public.data_directory in ' 'the .data file. See README in likelihood folder ' 'for further instructions.') # for loading of Nz-files: self.z_bins_min = [0.1, 0.3, 0.5, 0.7] self.z_bins_max = [0.3, 0.5, 0.7, 0.9] # number of angular bins in which xipm is measured # we always load the full data vector with 9 data points for xi_p and # xi_m each; they are cut to the fiducial scales (or any arbitrarily # defined scales with the 'cut_values.dat' files! self.ntheta = 9 # Force the cosmological module to store Pk for redshifts up to # max(self.z) and for k up to k_max self.need_cosmo_arguments(data, {'output': 'mPk'}) self.need_cosmo_arguments(data, {'P_k_max_h/Mpc': self.k_max_h_by_Mpc}) # Compute non-linear power spectrum if requested: if self.method_non_linear_Pk in [ 'halofit', 'HALOFIT', 'Halofit', 'hmcode', 'Hmcode', 'HMcode', 'HMCODE' ]: self.need_cosmo_arguments( data, {'non linear': self.method_non_linear_Pk}) print('Using {:} to obtain the non-linear P(k, z)!'.format( self.method_non_linear_Pk)) else: print( 'Only using the linear P(k, z) for ALL calculations \n (check keywords for "method_non_linear_Pk").' ) # Define array of l values, and initialize them # It is a logspace # find nlmax in order to reach lmax with logarithmic steps dlnl self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1 # redefine slightly dlnl so that the last point is always exactly lmax self.dlnl = np.log(self.lmax) / (self.nlmax - 1) self.l = np.exp(self.dlnl * np.arange(self.nlmax)) self.nzbins = len(self.z_bins_min) self.nzcorrs = self.nzbins * (self.nzbins + 1) / 2 # Create labels for loading of dn/dz-files: self.zbin_labels = [] for i in xrange(self.nzbins): self.zbin_labels += [ '{:.1f}t{:.1f}'.format(self.z_bins_min[i], self.z_bins_max[i]) ] # read in public data vector: temp = self.__load_public_data_vector() self.theta_bins = temp[:, 0] if (np.sum( (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:])**2) > 1e-6): raise io_mp.LikelihoodError( 'The angular values at which xi+ and xi- ' 'are observed do not match') # create the data-vector in the following format (due to covariance structure): # xi_obs = {xi1(theta1, z_11)...xi1(theta_k, z_11), xi2(theta_1, z_11)... # xi2(theta_k, z_11);...; xi1(theta1, z_nn)...xi1(theta_k, z_nn), # xi2(theta_1, z_nn)... xi2(theta_k, z_nn)} self.xi_obs = self.__get_xi_obs(temp[:, 1:]) # now load the full covariance matrix: covmat = self.__load_public_cov_mat() # Read angular cut values (OPTIONAL) if self.use_cut_theta: cut_values1 = np.zeros((self.nzbins, 2)) cut_values2 = np.zeros((self.nzbins, 2)) cutvalues_file_path1 = os.path.join( self.data_directory, 'CUT_VALUES/' + self.cutvalues_file1) if os.path.exists(cutvalues_file_path1): cut_values1 = np.loadtxt(cutvalues_file_path1) else: raise io_mp.LikelihoodError( 'File not found:\n {:} \n Check that requested file was copied to:\n {:}' .format(cutvalues_file_path1, self.data_directory + 'CUT_VALUES/')) if self.subtract_mask2_from_mask1: cutvalues_file_path2 = os.path.join( self.data_directory, 'CUT_VALUES/' + self.cutvalues_file2) if os.path.exists(cutvalues_file_path2): cut_values2 = np.loadtxt(cutvalues_file_path2) else: raise io_mp.LikelihoodError( 'File not found:\n {:} \n Check that requested file was copied to:\n {:}' .format(cutvalues_file_path2, self.data_directory + 'CUT_VALUES/')) # Compute theta mask if self.use_cut_theta: mask1 = self.__get_mask(cut_values1) if self.subtract_mask2_from_mask1: mask2 = self.__get_mask(cut_values2) mask = mask1 - mask2 else: mask = mask1 else: mask = np.ones(2 * self.nzcorrs * self.ntheta) self.mask_indices = np.where(mask == 1)[0] fname = os.path.join(self.data_directory, 'kids450_xipm_4bin_cut.dat') np.savetxt(fname, self.xi_obs[self.mask_indices]) # propagate uncertainty of m-correction following equation (12) in # Hildebrandt et al. 2017 (arXiv:1606.05338) with \sigma_m = 0.01 # NOTE: following Troxel et al. 2018 (arXiv:1804.10663) it is NOT # correct to use the noisy data vector for this; instead one should use # a theory vector (e.g. derived for the same cosmology for which the # analytical covariance was calculated). fname = os.path.join(self.data_directory, 'cov_matrix_ana_cut.dat') if self.marginalize_over_multiplicative_bias_uncertainty: cov_m_corr = np.matrix( self.xi_obs[self.mask_indices]).T * np.matrix(self.xi_obs[ self.mask_indices]) * 4. * self.err_multiplicative_bias**2 covmat = covmat[self.mask_indices][:, self.mask_indices] + np.asarray( cov_m_corr) #covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)] np.savetxt(fname, covmat) #covmat = covmat + np.asarray(cov_m_corr) else: #covmat = covmat[self.mask_indices][:, self.mask_indices] covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)] np.savetxt(fname, covmat) # precompute Cholesky transform for chi^2 calculation: self.cholesky_transform = cholesky(covmat, lower=True) # Read fiducial dn_dz from window files: #self.z_p = np.zeros(self.nzmax) # TODO: the hardcoded z_min and z_max correspond to the lower and upper # endpoints of the shifted left-border histogram! self.z_p = np.linspace(0.025, 3.475, self.nzmax) self.pz = np.zeros((self.nzmax, self.nzbins)) self.pz_norm = np.zeros(self.nzbins, 'float64') for zbin in xrange(self.nzbins): window_file_path = os.path.join( self.data_directory, 'Nz_{0:}/Nz_{0:}_Mean/Nz_{0:}_z{1:}.asc'.format( self.nz_method, self.zbin_labels[zbin])) zptemp, hist_pz = np.loadtxt(window_file_path, usecols=[0, 1], unpack=True) if zbin > 0: zpcheck = zptemp if np.sum((zptemp - zpcheck)**2) > 1e-6: raise io_mp.LikelihoodError( 'The redshift values for the window files at different bins do not match.' ) print('Loaded n(zbin{:}) from: \n'.format(zbin + 1), window_file_path) # we assume that the histograms loaded are given as left-border histograms # and that the z-spacing is the same for each histogram shift_to_midpoint = np.diff(zptemp)[0] / 2. spline_pz = itp.splrep(zptemp + shift_to_midpoint, hist_pz) z_mod = self.z_p #+ shift_by_dz[zbin] mask_min = z_mod >= zptemp.min() mask_max = z_mod <= zptemp.max() mask = mask_min & mask_max # points outside the z-range of the histograms are set to 0! self.pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz) # Normalize selection functions dz = self.z_p[1:] - self.z_p[:-1] self.pz_norm[zbin] = np.sum( 0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz) self.zmax = self.z_p.max() self.need_cosmo_arguments(data, {'z_max_pk': self.zmax}) ################################################ # discrete theta values (to convert C_l to xi's) ################################################ thetamin = np.min(self.theta_bins) * 0.8 thetamax = np.max(self.theta_bins) * 1.2 self.nthetatot = np.ceil( math.log(thetamax / thetamin) / self.dlntheta) + 1 self.nthetatot = np.int32(self.nthetatot) self.theta = np.zeros(self.nthetatot, 'float64') self.a2r = math.pi / (180. * 60.) # define an array of theta's for it in xrange(self.nthetatot): self.theta[it] = thetamin * math.exp(self.dlntheta * it) ################################################################ # discrete l values used in the integral to convert C_l to xi's) ################################################################ # l = x / theta / self.a2r # x = l * theta * self.a2r # We start by considering the largest theta, theta[-1], and for that value we infer # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax. # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's, # always requiuring the same dx stepsize (so that dl does vary) up to xmax. # # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl. # Then we create the array lll[nl] and we fill it with the same values. # # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r # is the first value of x above xmax ll = 1. il = 0 while (ll * self.theta[-1] * self.a2r < self.dx_threshold): ll += self.dx_below_threshold / self.theta[-1] / self.a2r il += 1 for it in xrange(self.nthetatot): while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r < self.xmax) and (ll + self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r < self.lmax): ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r il += 1 self.nl = il + 1 self.lll = np.zeros(self.nl, 'float64') self.il_max = np.zeros(self.nthetatot, 'int') il = 0 self.lll[il] = 1. while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold): il += 1 self.lll[il] = self.lll[ il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r for it in xrange(self.nthetatot): while (self.lll[il] * self.theta[self.nthetatot - 1 - it] * self.a2r < self.xmax) and ( self.lll[il] + self.dx_above_threshold / self.theta[self.nthetatot - 1 - it] / self.a2r < self.lmax): il += 1 self.lll[il] = self.lll[ il - 1] + self.dx_above_threshold / self.theta[ self.nthetatot - 1 - it] / self.a2r self.il_max[self.nthetatot - 1 - it] = il # finally we compute the array l*dl that will be used in the trapezoidal integration # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight) self.ldl = np.zeros(self.nl, 'float64') self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0]) for il in xrange(1, self.nl - 1): self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] - self.lll[il - 1]) self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2]) ##################################################################### # Allocation of various arrays filled and used in the function loglkl ##################################################################### self.r = np.zeros(self.nzmax, 'float64') self.dzdr = np.zeros(self.nzmax, 'float64') self.g = np.zeros((self.nzmax, self.nzbins), 'float64') self.pk = np.zeros((self.nlmax, self.nzmax), 'float64') self.k_sigma = np.zeros(self.nzmax, 'float64') self.alpha = np.zeros((self.nlmax, self.nzmax), 'float64') if 'epsilon' in self.use_nuisance: self.E_th_nu = np.zeros((self.nlmax, self.nzmax), 'float64') self.Cl_integrand = np.zeros((self.nzmax, self.nzcorrs), 'float64') self.Cl = np.zeros((self.nlmax, self.nzcorrs), 'float64') ''' if self.theoretical_error != 0: self.El_integrand = np.zeros((self.nzmax, self.nzcorrs),'float64') self.El = np.zeros((self.nlmax, self.nzcorrs), 'float64') ''' self.spline_Cl = np.empty(self.nzcorrs, dtype=(list, 3)) self.xi1 = np.zeros((self.nthetatot, self.nzcorrs), 'float64') self.xi2 = np.zeros((self.nthetatot, self.nzcorrs), 'float64') self.Cll = np.zeros((self.nzcorrs, self.nl), 'float64') self.BBessel0 = np.zeros(self.nl, 'float64') self.BBessel4 = np.zeros(self.nl, 'float64') self.xi1_theta = np.empty(self.nzcorrs, dtype=(list, 3)) self.xi2_theta = np.empty(self.nzcorrs, dtype=(list, 3)) self.xi = np.zeros(np.size(self.xi_obs), 'float64') return
def loglkl(self, cosmo, data): # One wants to obtain here the relation between z and r, this is done # by asking the cosmological module with the function z_of_r self.r = np.zeros(self.nzmax, 'float64') self.dzdr = np.zeros(self.nzmax, 'float64') self.r, self.dzdr = cosmo.z_of_r(self.z) # Compute now the selection function eta(r) = eta(z) dz/dr normalized # to one. The np.newaxis helps to broadcast the one-dimensional array # dzdr to the proper shape. Note that eta_norm is also broadcasted as # an array of the same shape as eta_z self.eta_r = self.eta_z * (self.dzdr[:, np.newaxis] / self.eta_norm) # Compute function g_i(r), that depends on r and the bin # g_i(r) = 2r(1+z(r)) int_0^+\infty drs eta_r(rs) (rs-r)/rs # TODO is the integration from 0 or r ? g = np.zeros((self.nzmax, self.nbin), 'float64') for Bin in xrange(self.nbin): for nr in xrange(1, self.nzmax - 1): fun = self.eta_r[nr:, Bin] * (self.r[nr:] - self.r[nr]) / self.r[nr:] g[nr, Bin] = np.sum(0.5 * (fun[1:] + fun[:-1]) * (self.r[nr + 1:] - self.r[nr:-1])) g[nr, Bin] *= 2. * self.r[nr] * (1. + self.z[nr]) # Get power spectrum P(k=l/r,z(r)) from cosmological module pk = np.zeros((self.nlmax, self.nzmax), 'float64') for index_l in xrange(self.nlmax): for index_z in xrange(1, self.nzmax): if (self.l[index_l] / self.r[index_z] > self.k_max): raise io_mp.LikelihoodError( "you should increase euclid_lensing.k_max up to at" "least %g" % self.l[index_l] / self.r[index_z]) pk[index_l, index_z] = cosmo.pk(self.l[index_l] / self.r[index_z], self.z[index_z]) # Recover the non_linear scale computed by halofit. If no scale was # affected, set the scale to one, and make sure that the nuisance # parameter epsilon is set to zero k_sigma = np.zeros(self.nzmax, 'float64') if (cosmo.nonlinear_method == 0): k_sigma[:] = 1.e6 else: k_sigma = cosmo.nonlinear_scale(self.z, self.nzmax) # Define the alpha function, that will characterize the theoretical # uncertainty. Chosen to be 0.001 at low k, raise between 0.1 and 0.2 # to self.theoretical_error alpha = np.zeros((self.nlmax, self.nzmax), 'float64') # self.theoretical_error = 0.1 if self.theoretical_error != 0: for index_l in range(self.nlmax): k = self.l[index_l] / self.r[1:] alpha[index_l, 1:] = np.log(1. + k[:] / k_sigma[1:]) / ( 1. + np.log(1. + k[:] / k_sigma[1:])) * self.theoretical_error # recover the e_th_nu part of the error function e_th_nu = self.coefficient_f_nu * cosmo.Omega_nu / cosmo.Omega_m() # Compute the Error E_th_nu function if 'epsilon' in self.use_nuisance: E_th_nu = np.zeros((self.nlmax, self.nzmax), 'float64') for index_l in range(1, self.nlmax): E_th_nu[index_l, :] = np.log( 1. + self.l[index_l] / k_sigma[:] * self.r[:]) / ( 1. + np.log(1. + self.l[index_l] / k_sigma[:] * self.r[:])) * e_th_nu # Add the error function, with the nuisance parameter, to P_nl_th, if # the nuisance parameter exists for index_l in range(self.nlmax): epsilon = data.mcmc_parameters['epsilon']['current'] * ( data.mcmc_parameters['epsilon']['scale']) pk[index_l, :] *= (1. + epsilon * E_th_nu[index_l, :]) # Start loop over l for computation of C_l^shear Cl_integrand = np.zeros((self.nzmax, self.nbin, self.nbin), 'float64') Cl = np.zeros((self.nlmax, self.nbin, self.nbin), 'float64') # Start loop over l for computation of E_l if self.theoretical_error != 0: El_integrand = np.zeros((self.nzmax, self.nbin, self.nbin), 'float64') El = np.zeros((self.nlmax, self.nbin, self.nbin), 'float64') for nl in xrange(self.nlmax): # find Cl_integrand = (g(r) / r)**2 * P(l/r,z(r)) for Bin1 in xrange(self.nbin): for Bin2 in xrange(self.nbin): Cl_integrand[1:, Bin1, Bin2] = g[1:, Bin1] * g[1:, Bin2] / ( self.r[1:]**2) * pk[nl, 1:] if self.theoretical_error != 0: El_integrand[1:, Bin1, Bin2] = g[1:, Bin1] * ( g[1:, Bin2]) / (self.r[1:]** 2) * pk[nl, 1:] * alpha[nl, 1:] # Integrate over r to get C_l^shear_ij = P_ij(l) # C_l^shear_ij = 9/16 Omega0_m^2 H_0^4 \sum_0^rmax dr (g_i(r) # g_j(r) /r**2) P(k=l/r,z(r)) # It it then multiplied by 9/16*Omega_m**2 to be in units of Mpc**4 # and then by (h/2997.9)**4 to be dimensionless for Bin1 in xrange(self.nbin): for Bin2 in xrange(self.nbin): Cl[nl, Bin1, Bin2] = np.sum(0.5 * (Cl_integrand[1:, Bin1, Bin2] + Cl_integrand[:-1, Bin1, Bin2]) * (self.r[1:] - self.r[:-1])) Cl[nl, Bin1, Bin2] *= 9. / 16. * (cosmo.Omega_m())**2 Cl[nl, Bin1, Bin2] *= (cosmo.h() / 2997.9)**4 if self.theoretical_error != 0: El[nl, Bin1, Bin2] = np.sum( 0.5 * (El_integrand[1:, Bin1, Bin2] + El_integrand[:-1, Bin1, Bin2]) * (self.r[1:] - self.r[:-1])) El[nl, Bin1, Bin2] *= 9. / 16. * (cosmo.Omega_m())**2 El[nl, Bin1, Bin2] *= (cosmo.h() / 2997.9)**4 if Bin1 == Bin2: Cl[nl, Bin1, Bin2] += self.noise # Write fiducial model spectra if needed (exit in that case) if self.fid_values_exist is False: # Store the values now, and exit. fid_file_path = os.path.join(self.data_directory, self.fiducial_file) with open(fid_file_path, 'w') as fid_file: fid_file.write('# Fiducial parameters') for key, value in data.mcmc_parameters.iteritems(): fid_file.write(', %s = %.5g' % (key, value['current'] * value['scale'])) fid_file.write('\n') for nl in range(self.nlmax): for Bin1 in range(self.nbin): for Bin2 in range(self.nbin): fid_file.write("%.8g\n" % Cl[nl, Bin1, Bin2]) print '\n\n /|\ Writing fiducial model in {0}'.format( fid_file_path) print '/_o_\ for {0} likelihood'.format(self.name) return 1j # Now that the fiducial model is stored, we add the El to both Cl and # Cl_fid (we create a new array, otherwise we would modify the # self.Cl_fid from one step to the other) # Spline Cl[nl,Bin1,Bin2] along l spline_Cl = np.empty((self.nbin, self.nbin), dtype=(list, 3)) for Bin1 in xrange(self.nbin): for Bin2 in xrange(Bin1, self.nbin): spline_Cl[Bin1, Bin2] = list(itp.splrep(self.l, Cl[:, Bin1, Bin2])) if Bin2 > Bin1: spline_Cl[Bin2, Bin1] = spline_Cl[Bin1, Bin2] # Spline El[nl,Bin1,Bin2] along l if self.theoretical_error != 0: spline_El = np.empty((self.nbin, self.nbin), dtype=(list, 3)) for Bin1 in xrange(self.nbin): for Bin2 in xrange(Bin1, self.nbin): spline_El[Bin1, Bin2] = list( itp.splrep(self.l, El[:, Bin1, Bin2])) if Bin2 > Bin1: spline_El[Bin2, Bin1] = spline_El[Bin1, Bin2] # Spline Cl_fid[nl,Bin1,Bin2] along l spline_Cl_fid = np.empty((self.nbin, self.nbin), dtype=(list, 3)) for Bin1 in xrange(self.nbin): for Bin2 in xrange(Bin1, self.nbin): spline_Cl_fid[Bin1, Bin2] = list( itp.splrep(self.l, self.Cl_fid[:, Bin1, Bin2])) if Bin2 > Bin1: spline_Cl_fid[Bin2, Bin1] = spline_Cl_fid[Bin1, Bin2] # Compute likelihood # Prepare interpolation for every integer value of l, from the array # self.l, to finally compute the likelihood (sum over all l's) dof = 1. / (int(self.l[-1]) - int(self.l[0]) + 1) ells = range(int(self.l[0]), int(self.l[-1]) + 1) # Define cov theory, observ and error on the whole integer range of ell # values Cov_theory = np.zeros((len(ells), self.nbin, self.nbin), 'float64') Cov_observ = np.zeros((len(ells), self.nbin, self.nbin), 'float64') Cov_error = np.zeros((len(ells), self.nbin, self.nbin), 'float64') for Bin1 in xrange(self.nbin): for Bin2 in xrange(Bin1, self.nbin): Cov_theory[:, Bin1, Bin2] = itp.splev(ells, spline_Cl[Bin1, Bin2]) Cov_observ[:, Bin1, Bin2] = itp.splev(ells, spline_Cl_fid[Bin1, Bin2]) if self.theoretical_error > 0: Cov_error[:, Bin1, Bin2] = itp.splev(ells, spline_El[Bin1, Bin2]) if Bin2 > Bin1: Cov_theory[:, Bin2, Bin1] = Cov_theory[:, Bin1, Bin2] Cov_observ[:, Bin2, Bin1] = Cov_observ[:, Bin1, Bin2] Cov_error[:, Bin2, Bin1] = Cov_error[:, Bin1, Bin2] chi2 = 0. # TODO parallelize this for index, ell in enumerate(ells): det_theory = np.linalg.det(Cov_theory[index, :, :]) det_observ = np.linalg.det(Cov_observ[index, :, :]) if (self.theoretical_error > 0): det_cross_err = 0 for i in range(self.nbin): newCov = np.copy(Cov_theory) newCov[:, i] = Cov_error[:, i] det_cross_err += np.linalg.det(newCov) # Newton method # Find starting point for the method: start = 0 step = 0.001 * det_theory / det_cross_err error = 1 old_chi2 = -1. * data.boundary_loglike error_tol = 0.01 epsilon_l = start while error > error_tol: vector = np.array( [epsilon_l - step, epsilon_l, epsilon_l + step]) # Computing the function on three neighbouring points function_vector = np.zeros(3, 'float64') for k in range(3): Cov_theory_plus_error = Cov_theory + vector[ k] * Cov_error det_theory_plus_error = np.linalg.det( Cov_theory_plus_error) det_theory_plus_error_cross_obs = 0 for i in range(self.nbin): newCov = np.copy(Cov_theory_plus_error) newCov[:, i] = Cov_observ[:, i] det_theory_plus_error_cross_obs += np.linalg.det( newCov) function_vector[k] = (2. * ell + 1.) * self.fsky * ( det_theory_plus_error_cross_obs / det_theory_plus_error + math.log(det_theory_plus_error / det_observ) - self.nbin) + dof * vector[k]**2 # Computing first first_d = (function_vector[2] - function_vector[0]) / (vector[2] - vector[0]) second_d = (function_vector[2] + function_vector[0] - 2 * function_vector[1]) / (vector[2] - vector[1])**2 # Updating point and error epsilon_l = vector[1] - first_d / second_d error = abs(function_vector[1] - old_chi2) old_chi2 = function_vector[1] # End Newton Cov_theory_plus_error = Cov_theory + epsilon_l * Cov_error det_theory_plus_error = np.linalg.det(Cov_theory_plus_error) det_theory_plus_error_cross_obs = 0 for i in range(self.nbin): newCov = np.copy(Cov_theory_plus_error) newCov[:, i] = Cov_observ[:, i] det_theory_plus_error_cross_obs += np.linalg.det(newCov) chi2 += (2. * ell + 1.) * self.fsky * ( det_theory_plus_error_cross_obs / det_theory_plus_error + math.log(det_theory_plus_error / det_observ) - self.nbin) + dof * epsilon_l**2 else: det_cross = 0. for i in xrange(self.nbin): newCov = np.copy(Cov_theory[index, :, :]) newCov[:, i] = Cov_observ[index, :, i] det_cross += np.linalg.det(newCov) chi2 += (2. * ell + 1.) * self.fsky * ( det_cross / det_theory + math.log(det_theory / det_observ) - self.nbin) # Finally adding a gaussian prior on the epsilon nuisance parameter, if # present if 'epsilon' in self.use_nuisance: epsilon = data.mcmc_parameters['epsilon']['current'] * \ data.mcmc_parameters['epsilon']['scale'] chi2 += epsilon**2 return -chi2 / 2.