def test_group(): conv_ensemble_sparse = Ensemble.fromfilelist(map_list) conv_ensemble_sparse.load(callback_loader=default_callback_loader,pool=pool,l_edges=l_edges) conv_ensemble_sparse.group(group_size=2,kind="sparse") assert conv_ensemble_sparse.num_realizations==2 conv_ensemble_contiguous = Ensemble.fromfilelist(map_list) conv_ensemble_contiguous.load(callback_loader=default_callback_loader,pool=pool,l_edges=l_edges) conv_ensemble_contiguous.group(group_size=2,kind="contiguous") assert conv_ensemble_contiguous.num_realizations==2 fig,ax = plt.subplots() for n in range(conv_ensemble.num_realizations): ax.plot(l,l*(l+1)*conv_ensemble.data[n]/(2.0*np.pi),label="Original {0}".format(n+1),linestyle="-") for n in range(conv_ensemble_sparse.num_realizations): ax.plot(l,l*(l+1)*conv_ensemble_sparse.data[n]/(2.0*np.pi),label="Sparse {0}".format(n+1),linestyle="--") for n in range(conv_ensemble_contiguous.num_realizations): ax.plot(l,l*(l+1)*conv_ensemble_contiguous.data[n]/(2.0*np.pi),label="Contiguous {0}".format(n+1),linestyle="-.") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel(r"$l$") ax.set_ylabel(r"$l(l+1)P_l/2\pi$") ax.legend(loc="upper left",prop={"size":7}) plt.savefig("power_ensemble_grouped.png") plt.clf() return conv_ensemble_sparse._scheme,conv_ensemble_contiguous._scheme
def test_add(): conv_ensemble1 = Ensemble.fromfilelist(map_list[0:2]) conv_ensemble2 = Ensemble.fromfilelist(map_list[2:]) conv_ensemble1.load(callback_loader=default_callback_loader,pool=None,l_edges=l_edges) conv_ensemble2.load(callback_loader=default_callback_loader,pool=None,l_edges=l_edges) conv_ensemble_union = conv_ensemble1 + conv_ensemble2 assert conv_ensemble_union.num_realizations == 4 assert len(conv_ensemble_union.file_list) == 4 assert conv_ensemble_union.data.shape[0] == 4 assert conv_ensemble_union.data.shape[1] == conv_ensemble1.data.shape[1]
def test_group(): conv_ensemble_sparse = Ensemble.fromfilelist(map_list) conv_ensemble_sparse.load(callback_loader=default_callback_loader, pool=pool, l_edges=l_edges) conv_ensemble_sparse.group(group_size=2, kind="sparse") assert conv_ensemble_sparse.num_realizations == 2 conv_ensemble_contiguous = Ensemble.fromfilelist(map_list) conv_ensemble_contiguous.load(callback_loader=default_callback_loader, pool=pool, l_edges=l_edges) conv_ensemble_contiguous.group(group_size=2, kind="contiguous") assert conv_ensemble_contiguous.num_realizations == 2 fig, ax = plt.subplots() for n in range(conv_ensemble.num_realizations): ax.plot(l, l * (l + 1) * conv_ensemble.data[n] / (2.0 * np.pi), label="Original {0}".format(n + 1), linestyle="-") for n in range(conv_ensemble_sparse.num_realizations): ax.plot(l, l * (l + 1) * conv_ensemble_sparse.data[n] / (2.0 * np.pi), label="Sparse {0}".format(n + 1), linestyle="--") for n in range(conv_ensemble_contiguous.num_realizations): ax.plot(l, l * (l + 1) * conv_ensemble_contiguous.data[n] / (2.0 * np.pi), label="Contiguous {0}".format(n + 1), linestyle="-.") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel(r"$l$") ax.set_ylabel(r"$l(l+1)P_l/2\pi$") ax.legend(loc="upper left", prop={"size": 7}) plt.savefig("power_ensemble_grouped.png") plt.clf() return conv_ensemble_sparse._scheme, conv_ensemble_contiguous._scheme
def test_index(): #Decide the statistical descriptors to measure, and build an index idx = Indexer.stack([ PowerSpectrum(l_edges), Peaks(thresholds_pk, norm=True), MinkowskiAll(thresholds_mf, norm=True), PDF(thresholds_mf, norm=True), Moments(connected=True) ]) l = idx[0].l v = idx[1].midpoints v_mf = idx[2].midpoints #Initiate the statistical ensemble ens = Ensemble.fromfilelist(map_list) #Load measurements into the ensemble (this is the expensive part!!!) ens.load(callback_loader=convergence_measure_all, pool=None, index=idx) #Split the ensemble in power_spectrum,peaks, and the second and third minkowski functional mink_idx = idx[2].separate() subset_idx = Indexer([idx[0], idx[1], idx[3], mink_idx[2], idx[-1]]) ens_pow, ens_pk, ens_pdf, ens_mink2, ens_mom = ens.split(subset_idx) ##################################################################### #Plot to check fig, ax = plt.subplots(2, 2, figsize=(16, 16)) for i in range(ens.num_realizations): ax[0, 0].plot(l, l * (l + 1) * ens_pow.data[i] / (2.0 * np.pi)) ax[0, 1].plot(v, ens_pk.data[i]) ax[1, 0].plot(v_mf, ens_pdf.data[i]) ax[1, 1].plot(v_mf, ens_mink2.data[i]) ax[0, 0].set_xscale("log") ax[0, 0].set_yscale("log") ax[0, 0].set_xlabel(r"$l$") ax[0, 0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[0, 1].set_xlabel(r"$\nu$") ax[0, 1].set_ylabel(r"$dN/d\nu$") ax[1, 0].set_xlabel(r"$\nu$") ax[1, 0].set_ylabel(r"$P(\nu)$") ax[1, 1].set_xlabel(r"$\nu$") ax[1, 1].set_ylabel(r"$V_2(\nu)$") fig.tight_layout() plt.savefig("conv_all.png") plt.clf() #Save moments to check np.savetxt("moments.txt", ens_mom.mean())
def test_add(): conv_ensemble1 = Ensemble.fromfilelist(map_list[0:2]) conv_ensemble2 = Ensemble.fromfilelist(map_list[2:]) conv_ensemble1.load(callback_loader=default_callback_loader, pool=None, l_edges=l_edges) conv_ensemble2.load(callback_loader=default_callback_loader, pool=None, l_edges=l_edges) conv_ensemble_union = conv_ensemble1 + conv_ensemble2 assert conv_ensemble_union.num_realizations == 4 assert len(conv_ensemble_union.file_list) == 4 assert conv_ensemble_union.data.shape[0] == 4 assert conv_ensemble_union.data.shape[1] == conv_ensemble1.data.shape[1]
def test_chi2(): conv_ensemble1 = Ensemble.fromfilelist(map_list[0:2]) conv_ensemble1.load(callback_loader=default_callback_loader, pool=None, l_edges=l_edges) print("chi2 difference = {0}".format( conv_ensemble.compare(conv_ensemble1)))
def test_multiply(): conv_ensemble_peaks = Ensemble.fromfilelist(map_list) conv_ensemble_peaks.load(callback_loader=peaks_loader,pool=None,thresholds=thresholds_pk) conv_ensemble_both = conv_ensemble * conv_ensemble_peaks assert conv_ensemble_both.num_realizations == 4 assert conv_ensemble_both.data.shape[0] == 4 assert conv_ensemble_both.data.shape[1] == len(l_edges) + len(thresholds_pk) - 2
def test_index(): #Decide the statistical descriptors to measure, and build an index idx = Indexer.stack([PowerSpectrum(l_edges),Peaks(thresholds_pk,norm=True),MinkowskiAll(thresholds_mf,norm=True),PDF(thresholds_mf,norm=True),Moments(connected=True)]) l = idx[0].l v = idx[1].midpoints v_mf = idx[2].midpoints #Initiate the statistical ensemble ens = Ensemble.fromfilelist(map_list) #Load measurements into the ensemble (this is the expensive part!!!) ens.load(callback_loader=convergence_measure_all,pool=None,index=idx) #Split the ensemble in power_spectrum,peaks, and the second and third minkowski functional mink_idx = idx[2].separate() subset_idx = Indexer([idx[0],idx[1],idx[3],mink_idx[2],idx[-1]]) ens_pow,ens_pk,ens_pdf,ens_mink2,ens_mom = ens.split(subset_idx) ##################################################################### #Plot to check fig,ax = plt.subplots(2,2,figsize=(16,16)) for i in range(ens.num_realizations): ax[0,0].plot(l,l*(l+1)*ens_pow.data[i]/(2.0*np.pi)) ax[0,1].plot(v,ens_pk.data[i]) ax[1,0].plot(v_mf,ens_pdf.data[i]) ax[1,1].plot(v_mf,ens_mink2.data[i]) ax[0,0].set_xscale("log") ax[0,0].set_yscale("log") ax[0,0].set_xlabel(r"$l$") ax[0,0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[0,1].set_xlabel(r"$\nu$") ax[0,1].set_ylabel(r"$dN/d\nu$") ax[1,0].set_xlabel(r"$\nu$") ax[1,0].set_ylabel(r"$P(\nu)$") ax[1,1].set_xlabel(r"$\nu$") ax[1,1].set_ylabel(r"$V_2(\nu)$") fig.tight_layout() plt.savefig("conv_all.png") plt.clf() #Save moments to check np.savetxt("moments.txt",ens_mom.mean())
def test_multiply(): conv_ensemble_peaks = Ensemble.fromfilelist(map_list) conv_ensemble_peaks.load(callback_loader=peaks_loader, pool=None, thresholds=thresholds_pk) conv_ensemble_both = conv_ensemble * conv_ensemble_peaks assert conv_ensemble_both.num_realizations == 4 assert conv_ensemble_both.data.shape[0] == 4 assert conv_ensemble_both.data.shape[1] == len(l_edges) + len( thresholds_pk) - 2
def measure_hough(self,pool=None,threshold=0.1,bins=np.linspace(0.0,0.0014,50),save_type="npy"): assert self.measurer==igs1_measure_hough realizations = range(1,self.nrealizations+1) #Build the ensemble ens = Ensemble.fromfilelist(realizations) #Load the data into the ensemble by calling the measurer on each map ens.load(callback_loader=self.measurer,pool=pool,model=self.model,redshift=self.redshift,big_fiducial_set=self.big_fiducial_set,threshold=threshold,bins=bins) #Save savename = self.savename(descriptor="hough") logging.info("Saving hough histograms to {0}".format(savename)) ens.save(savename)
def measure_all_histograms(models,options,pool): #Look at a sample map sample_map = ConvergenceMap.fromfilename(models[0].getNames(z=1.0,realizations=[1])[0],loader=load_fits_default_convergence) #Initialize Gaussian shape noise generator for the sample map shape and angle generator = GaussianNoiseGenerator.forMap(sample_map) #Parsed from options num_realizations = options.getint("analysis","num_realizations") smoothing_scales = [float(scale) for scale in options.get("analysis","smoothing_scales").split(",")] bin_edges = np.ogrid[options.getfloat("analysis","bin_edge_low"):options.getfloat("analysis","bin_edge_high"):(options.getint("analysis","num_bins") - 2)*1j] bin_edges = np.hstack((-10.0,bin_edges,10.0)) z = options.getfloat("analysis","redshift") bin_midpoints = 0.5*(bin_edges[1:] + bin_edges[:-1]) #Create smoothing scale index for the histograms idx = Indexer.stack([PDF(bin_edges) for scale in smoothing_scales]) #Build the data type of the structure array in output data_type = [(model.name,Ensemble) for model in models] #Append info about the smoothing scale data_type = [("Smooth",np.float),] + data_type #Create output struct array ensemble_array = np.zeros(len(smoothing_scales),dtype=data_type) #Write smoothing scale information ensemble_array["Smooth"] = np.array(smoothing_scales) #The for loop runs the distributed computations for model in models: #Build Ensemble instance with the maps to analyze map_ensemble = Ensemble.fromfilelist(range(1,num_realizations+1)) #Measure the histograms and load the data in the ensemble map_ensemble.load(callback_loader=compute_map_histograms,pool=pool,simulation_set=model,smoothing_scales=smoothing_scales,index=idx,generator=generator,bin_edges=bin_edges,redshift=z) #Split the ensemble between different smoothing scales map_ensemble_list = map_ensemble.split(idx) #Add to output struct array ensemble_array[model.name] = np.array(map_ensemble_list) return ensemble_array
def measure(self,pool=None): """ Measures the features specified in the Indexer for all the maps whose names are calculated by get_all_map_names; saves the ensemble results in numpy array format """ #Build the ensemble ens = Ensemble.fromfilelist(self.map_names) #Load the data into the ensemble by calling the measurer on each map ens.load(callback_loader=self.measurer,pool=pool,**self.kwargs) #Break the ensemble into sub-ensemble, one for each feature single_feature_ensembles = ens.split(self.kwargs["index"]) #For each of the sub_ensembles, save it in the appropriate directory for n,ensemble in enumerate(single_feature_ensembles): ensemble.save(os.path.join(self.full_save_path,self.kwargs["index"][n].name) + ".npy")
def test_differentiate(): thresholds = np.arange(-0.04, 0.12, 0.001) midpoints = 0.5 * (thresholds[:-1] + thresholds[1:]) index = Indexer.stack([MinkowskiAll(thresholds)]) index_separate = Indexer(MinkowskiAll(thresholds).separate()) diff_ensemble = Ensemble.fromfilelist(map_list) diff_ensemble.load(callback_loader=convergence_measure_all, index=index) ensemble_0 = diff_ensemble.split(index_separate)[0] ensemble_pdf = ensemble_0.differentiate(step=thresholds[0] - thresholds[1]) fig, ax = plt.subplots() for i in range(ensemble_0.num_realizations): ax.plot(0.5 * (midpoints[:-1] + midpoints[1:]), ensemble_pdf[i]) ax.set_xlabel(r"$\kappa$") ax.set_ylabel(r"$P(\kappa)$") fig.savefig("ensemble_differentiate.png")
def test_differentiate(): thresholds = np.arange(-0.04,0.12,0.001) midpoints = 0.5*(thresholds[:-1] + thresholds[1:]) index = Indexer.stack([MinkowskiAll(thresholds)]) index_separate = Indexer(MinkowskiAll(thresholds).separate()) diff_ensemble = Ensemble.fromfilelist(map_list) diff_ensemble.load(callback_loader=convergence_measure_all,index=index) ensemble_0 = diff_ensemble.split(index_separate)[0] ensemble_pdf = ensemble_0.differentiate(step=thresholds[0]-thresholds[1]) fig,ax = plt.subplots() for i in range(ensemble_0.num_realizations): ax.plot(0.5*(midpoints[:-1]+midpoints[1:]),ensemble_pdf[i]) ax.set_xlabel(r"$\kappa$") ax.set_ylabel(r"$P(\kappa)$") fig.savefig("ensemble_differentiate.png")
def measure(self,pool=None,save_type="npy"): """ Measures the features specified in the Indexer for all the maps whose names are calculated by get_all_map_names; saves the ensemble results in numpy array format """ realizations = range(1,self.nrealizations+1) #Build the ensemble ens = Ensemble.fromfilelist(realizations) #Load the data into the ensemble by calling the measurer on each map ens.load(callback_loader=self.measurer,pool=pool,model=self.model,index=self.index,mask_filename=None,redshift=self.redshift,big_fiducial_set=self.big_fiducial_set) #Break the ensemble into sub-ensemble, one for each feature single_feature_ensembles = ens.split(self.index) #For each of the sub_ensembles, save it in the appropriate directory for n,ensemble in enumerate(single_feature_ensembles): savename = self.savename(descriptor=self.index[n]) logging.debug("Saving features to {0}".format(savename)) ensemble.save(savename)
for subfield in subfields: fig_power,ax_power = plt.subplots() fig_peaks,ax_peaks = plt.subplots() fig_minkowski,ax_minkowski = plt.subplots(1,3,figsize=(24,8)) #Plot for the sumulations for model in training_models + [observed_model]: m = Measurement(model=model,options=options,subfield=subfield,smoothing_scale=smoothing_scale,measurer=None) m.get_all_map_names() #Load the features and plot #Power spectrum ensemble_power = Ensemble.fromfilelist([os.path.join(m.full_save_path,"power_spectrum.npy")]) ensemble_power.load(from_old=True) P = ensemble_power.mean() if type(model) == CFHTLens: ax_power.plot(l,l*(l+1)*P/(2*np.pi),linestyle="--",color="black") elif type(model) == CFHTemu1: ax_power.plot(l,l*(l+1)*P/(2*np.pi)) #Peaks ensemble_peaks = Ensemble.fromfilelist([os.path.join(m.full_save_path,"peaks.npy")]) ensemble_peaks.load(from_old=True) pk = ensemble_peaks.mean() if type(model) == CFHTLens: ax_peaks.plot(v_pk,pk,linestyle="--",color="black")
map_mock_ids = range(int(sys.argv[1])) igs1_set = IGS1( root_path= "/Users/andreapetri/Documents/Columbia/spurious_shear/convergence_maps") map_igs1_ids = igs1_set.getNames(z=1.0, realizations=range(1, int(sys.argv[1]) + 1)) gen = GaussianNoiseGenerator(shape=(2048, 2048), side_angle=3.41 * deg, label="convergence") power_func = np.loadtxt("Data/ee4e-7.txt", unpack=True) ens_mock = Ensemble.fromfilelist(map_mock_ids) ens_igs1 = Ensemble.fromfilelist(map_igs1_ids) ens_mock.load(callback_loader=generate_and_measure, pool=pool, generator=gen, power_func=power_func) ens_igs1.load(callback_loader=measure_from_IGS1, pool=pool) if pool is not None: pool.close() np.savetxt( "moments_mock.txt", np.array([ens_mock.mean(), np.sqrt(ens_mock.covariance().diagonal())]))
pool = None #The only parallelized part is the loading of the ensemble (that's the computationally expensive part) if (pool is not None) and not(pool.is_master()): pool.wait() sys.exit(0) map_list = ["Data/conv1.fit","Data/conv2.fit","Data/conv3.fit","Data/conv4.fit"] l_edges = np.arange(200.0,50000.0,200.0) thresholds_pk = np.arange(-1.0,5.0,0.2) l = 0.5*(l_edges[:-1] + l_edges[1:]) conv_ensemble = Ensemble.fromfilelist(map_list) conv_ensemble.load(callback_loader=default_callback_loader,pool=pool,l_edges=l_edges) if pool is not None: pool.close() def test_shape(): assert conv_ensemble.num_realizations==len(map_list) assert conv_ensemble.data.shape==(len(map_list),len(l_edges)-1) def test_power_plot(): fig,ax = plt.subplots() for n in range(conv_ensemble.num_realizations): ax.plot(l,l*(l+1)*conv_ensemble[n]/(2.0*np.pi),label="Map {0}".format(n+1),linestyle="--")
def test_interpolation(): root_path = "Data/all" analysis = LikelihoodAnalysis() #Read in model names models = CFHTemu1.getModels()[:17] assert len(models) == 17 #Shuffle the models np.random.seed(1) np.random.shuffle(models) #Divide into training and testing training_models = models[:-1] testing_model = models[-1] #Read multipoles ell = np.load(os.path.join(root_path, "ell.npy")) #Load in the means of the power spectra of the 17 models, and populate the analysis instance for model in training_models: ens = Ensemble.fromfilelist([ os.path.join(root_path, model._cosmo_id_string, "subfield1", "sigma05", "power_spectrum.npy") ]) ens.load(from_old=True) analysis.add_model(parameters=model.squeeze(with_ns=True), feature=ens.mean()) #Add the multipoles to the analysis analysis.add_feature_label(ell) l = analysis.feature_label ens = Ensemble.fromfilelist([ os.path.join(root_path, testing_model._cosmo_id_string, "subfield1", "sigma05", "power_spectrum.npy") ]) ens.load(from_old=True) testing_Pl = ens.mean() #Load in also the observed power spectrum ens = Ensemble.fromfilelist([ os.path.join(root_path, "observations", "subfield1", "sigma05", "power_spectrum.npy") ]) ens.load(from_old=True) observed_Pl = ens.mean() #Output the analysis stats np.savetxt("16_parameter_points.txt", analysis.parameter_set) for n in range(len(training_models)): plt.plot(l, l * (l + 1) * analysis.training_set[n] / (2 * np.pi)) plt.plot(l, l * (l + 1) * observed_Pl / (2 * np.pi), linestyle="--", label="Observation") plt.xlabel(r"$l$") plt.ylabel(r"$l(l+1)P_l/2\pi$") plt.yscale("log") plt.legend(loc="upper left") plt.savefig("16_power_spectra.png") plt.clf() #Train the interpolators analysis.train(use_parameters=range(3)) assert hasattr(analysis, "_interpolator") assert hasattr(analysis, "_num_bins") #Emulator portability test with pickle/unpickle analysis.save("analysis.p") emulator = LikelihoodAnalysis.load("analysis.p") #Predict the power spectrum at the remaining point predicted_Pl = emulator.predict(testing_model.squeeze()) #Plot it against the measured one fig, ax = plt.subplots(2, 1, figsize=(16, 8)) #Measured ax[0].plot(l, l * (l + 1) * testing_Pl / (2 * np.pi), label="measured") #Predicted ax[0].plot(l, l * (l + 1) * predicted_Pl / (2 * np.pi), label="interpolated") #Fractional difference ax[1].plot(l, (predicted_Pl - testing_Pl) / testing_Pl) ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") ax[0].legend(loc="upper left") plt.savefig("power_interpolator_test.png") plt.clf() #Give it a shot with two points in parameter space to test vectorization two_parameter_points = np.array( (training_models[0].squeeze(), testing_model.squeeze())) two_predicted_Pl = emulator.predict(two_parameter_points) fig, ax = plt.subplots(2, 1, figsize=(16, 8)) #Predicted ax[0].plot(l, l * (l + 1) * two_predicted_Pl[0] / (2 * np.pi), color="red", linestyle="--") ax[0].plot(l, l * (l + 1) * two_predicted_Pl[1] / (2 * np.pi), color="green", linestyle="--") #Measured ax[0].plot(l, l * (l + 1) * emulator.training_set[0] / (2 * np.pi), color="red", linestyle="-") ax[0].plot(l, l * (l + 1) * testing_Pl / (2 * np.pi), color="green", linestyle="-") #Fractional difference ax[1].plot(l, (two_predicted_Pl[0] - emulator.training_set[0]) / emulator.training_set[0], color="red") ax[1].plot(l, (two_predicted_Pl[1] - testing_Pl) / testing_Pl, color="green") ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") plt.savefig("power_interpolator_test_2.png") plt.clf() #Generate a fudge power spectrum covariance matrix covariance = np.diag(testing_Pl**2 / (0.5 + l)) #Generate a fudge observation by wiggling the testing power spectrum observation = testing_Pl + np.random.uniform(low=-testing_Pl * 0.1, high=testing_Pl * 0.1) #Choose a bunch of points in parameter space points = emulator.parameter_set[:, :-1] #Compute the chi2 chi2_values_1 = emulator.chi2(points, observation, covariance) chi2_values_2 = emulator.chi2(points, observation, covariance, split_chunks=4) assert chi2_values_1.shape == chi2_values_2.shape #Compute the individual contributions chi2_contributions = emulator.chi2Contributions(points[0], observation, covariance) #Plot plt.imshow(chi2_contributions, interpolation="nearest") plt.colorbar() plt.xlabel(r"$j$") plt.ylabel(r"$i$") plt.savefig("chi2_contributions.png") plt.clf() return chi2_values_1, chi2_values_2
pool = None if (pool is not None) and not(pool.is_master()): pool.wait() sys.exit(0) map_mock_ids = range(int(sys.argv[1])) igs1_set = IGS1(root_path="/Users/andreapetri/Documents/Columbia/spurious_shear/convergence_maps") map_igs1_ids = igs1_set.getNames(z=1.0,realizations=range(1,int(sys.argv[1])+1)) gen = GaussianNoiseGenerator(shape=(2048,2048),side_angle=3.41*deg,label="convergence") power_func = np.loadtxt("Data/ee4e-7.txt",unpack=True) ens_mock = Ensemble.fromfilelist(map_mock_ids) ens_igs1 = Ensemble.fromfilelist(map_igs1_ids) ens_mock.load(callback_loader=generate_and_measure,pool=pool,generator=gen,power_func=power_func) ens_igs1.load(callback_loader=measure_from_IGS1,pool=pool) if pool is not None: pool.close() np.savetxt("moments_mock.txt",np.array([ens_mock.mean(),np.sqrt(ens_mock.covariance().diagonal())])) np.savetxt("moments_igs1.txt",np.array([ens_igs1.mean(),np.sqrt(ens_igs1.covariance().diagonal())])) logging.info("Done!")
#Create smoothing scale index for the histogram idx = Indexer.stack([PDF(bin_edges) for scale in smoothing_scales]) #Create IGS1 simulation set object to look for the right simulations simulation_set = IGS1(root_path=root_path) #Look at a sample map sample_map = ConvergenceMap.load( simulation_set.getNames(z=1.0, realizations=[1])[0]) #Initialize Gaussian shape noise generator generator = GaussianNoiseGenerator.forMap(sample_map) #Build Ensemble instance with the maps to analyze map_ensemble = Ensemble.fromfilelist(range(1, num_realizations + 1)) #Measure the histograms and load the data in the ensemble map_ensemble.load(callback_loader=compute_histograms, pool=pool, simulation_set=simulation_set, smoothing_scales=smoothing_scales, index=idx, generator=generator, bin_edges=bin_edges) if pool is not None: pool.close() ########################################################################################################################################## ###############################Ensemble data available at this point for covariance, PCA, etc...##########################################
bin_midpoints = 0.5*(bin_edges[1:] + bin_edges[:-1]) #Create smoothing scale index for the histogram idx = Indexer.stack([PDF(bin_edges) for scale in smoothing_scales]) #Create IGS1 simulation set object to look for the right simulations simulation_set = IGS1(root_path=root_path) #Look at a sample map sample_map = ConvergenceMap.load(simulation_set.getNames(z=1.0,realizations=[1])[0]) #Initialize Gaussian shape noise generator generator = GaussianNoiseGenerator.forMap(sample_map) #Build Ensemble instance with the maps to analyze map_ensemble = Ensemble.fromfilelist(range(1,num_realizations+1)) #Measure the histograms and load the data in the ensemble map_ensemble.load(callback_loader=compute_histograms,pool=pool,simulation_set=simulation_set,smoothing_scales=smoothing_scales,index=idx,generator=generator,bin_edges=bin_edges) if pool is not None: pool.close() ########################################################################################################################################## ###############################Ensemble data available at this point for covariance, PCA, etc...########################################## ########################################################################################################################################## #Plot results to check fig,ax = plt.subplots(len(smoothing_scales),1) for i in range(len(smoothing_scales)):
def test_chi2(): conv_ensemble1 = Ensemble.fromfilelist(map_list[0:2]) conv_ensemble1.load(callback_loader=default_callback_loader,pool=None,l_edges=l_edges) print("chi2 difference = {0}".format(conv_ensemble.compare(conv_ensemble1)))
def test_interpolation(): root_path = "Data/all" analysis = LikelihoodAnalysis() #Read in model names models = CFHTemu1.getModels()[:17] assert len(models) == 17 #Shuffle the models np.random.seed(1) np.random.shuffle(models) #Divide into training and testing training_models = models[:-1] testing_model = models[-1] #Read multipoles ell = np.load(os.path.join(root_path,"ell.npy")) #Load in the means of the power spectra of the 17 models, and populate the analysis instance for model in training_models: ens = Ensemble.fromfilelist([os.path.join(root_path,model._cosmo_id_string,"subfield1","sigma05","power_spectrum.npy")]) ens.load(from_old=True) analysis.add_model(parameters=model.squeeze(with_ns=True),feature=ens.mean()) #Add the multipoles to the analysis analysis.add_feature_label(ell) l = analysis.feature_label ens = Ensemble.fromfilelist([os.path.join(root_path,testing_model._cosmo_id_string,"subfield1","sigma05","power_spectrum.npy")]) ens.load(from_old=True) testing_Pl = ens.mean() #Load in also the observed power spectrum ens = Ensemble.fromfilelist([os.path.join(root_path,"observations","subfield1","sigma05","power_spectrum.npy")]) ens.load(from_old=True) observed_Pl = ens.mean() #Output the analysis stats np.savetxt("16_parameter_points.txt",analysis.parameter_set) for n in range(len(training_models)): plt.plot(l,l*(l+1)*analysis.training_set[n]/(2*np.pi)) plt.plot(l,l*(l+1)*observed_Pl/(2*np.pi),linestyle="--",label="Observation") plt.xlabel(r"$l$") plt.ylabel(r"$l(l+1)P_l/2\pi$") plt.yscale("log") plt.legend(loc="upper left") plt.savefig("16_power_spectra.png") plt.clf() #Train the interpolators analysis.train(use_parameters=range(3)) assert hasattr(analysis,"_interpolator") assert hasattr(analysis,"_num_bins") #Emulator portability test with pickle/unpickle analysis.save("analysis.p") emulator = LikelihoodAnalysis.load("analysis.p") #Predict the power spectrum at the remaining point predicted_Pl = emulator.predict(testing_model.squeeze()) #Plot it against the measured one fig,ax = plt.subplots(2,1,figsize=(16,8)) #Measured ax[0].plot(l,l*(l+1)*testing_Pl/(2*np.pi),label="measured") #Predicted ax[0].plot(l,l*(l+1)*predicted_Pl/(2*np.pi),label="interpolated") #Fractional difference ax[1].plot(l,(predicted_Pl - testing_Pl)/testing_Pl) ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") ax[0].legend(loc="upper left") plt.savefig("power_interpolator_test.png") plt.clf() #Give it a shot with two points in parameter space to test vectorization two_parameter_points = np.array((training_models[0].squeeze(),testing_model.squeeze())) two_predicted_Pl = emulator.predict(two_parameter_points) fig,ax = plt.subplots(2,1,figsize=(16,8)) #Predicted ax[0].plot(l,l*(l+1)*two_predicted_Pl[0]/(2*np.pi),color="red",linestyle="--") ax[0].plot(l,l*(l+1)*two_predicted_Pl[1]/(2*np.pi),color="green",linestyle="--") #Measured ax[0].plot(l,l*(l+1)*emulator.training_set[0]/(2*np.pi),color="red",linestyle="-") ax[0].plot(l,l*(l+1)*testing_Pl/(2*np.pi),color="green",linestyle="-") #Fractional difference ax[1].plot(l,(two_predicted_Pl[0] - emulator.training_set[0])/emulator.training_set[0],color="red") ax[1].plot(l,(two_predicted_Pl[1] - testing_Pl)/testing_Pl,color="green") ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") plt.savefig("power_interpolator_test_2.png") plt.clf() #Generate a fudge power spectrum covariance matrix covariance = np.diag(testing_Pl**2/(0.5 + l)) #Generate a fudge observation by wiggling the testing power spectrum observation = testing_Pl + np.random.uniform(low=-testing_Pl*0.1,high=testing_Pl*0.1) #Choose a bunch of points in parameter space points = emulator.parameter_set[:,:-1] #Compute the chi2 chi2_values_1 = emulator.chi2(points,observation,covariance) chi2_values_2 = emulator.chi2(points,observation,covariance,split_chunks=4) assert chi2_values_1.shape == chi2_values_2.shape #Compute the individual contributions chi2_contributions = emulator.chi2Contributions(points[0],observation,covariance) #Plot plt.imshow(chi2_contributions,interpolation="nearest") plt.colorbar() plt.xlabel(r"$j$") plt.ylabel(r"$i$") plt.savefig("chi2_contributions.png") plt.clf() return chi2_values_1,chi2_values_2
def load_features(self,model,save_new=False): #First create an empty ensemble ensemble_all_subfields = Ensemble() #Then cycle through all the subfields and gather the features for each one for subfield in self.subfields: #Dictionary that holds all the measurements m = dict() for smoothing_scale in self.smoothing_scales: m[smoothing_scale] = Measurement(model=model,options=self.options,subfield=subfield,smoothing_scale=smoothing_scale,measurer=None,mean_subtract=self.cmd_args.mean_subtract) m[smoothing_scale].get_all_map_names() #Construct one ensemble for each feature (with included smoothing scales) and load in the data ensemble_subfield = list() #Prevent randomness in dictionary keys features_to_measure = self.features_to_measure.keys() features_to_measure.sort() for feature_type in features_to_measure: for smoothing_scale in self.features_to_measure[feature_type]: #Construct the subfield/smoothing scale/feature specific ensemble ens_filename = os.path.join(m[smoothing_scale].full_save_path,npy_filename(feature_type)) logging.info("Reading ensemble from {0}".format(ens_filename)) ens = Ensemble.fromfilelist([ens_filename]) ens.load(from_old=True) #Check if we want to cut out some of the peaks if self.cmd_args.cut_convergence and feature_type=="peaks": new_thresholds = ens.cut(self.kappa_min,self.kappa_max,feature_label=self.kappa_peaks) logging.log(DEBUG_PLUS,"Performed cut on the peaks convergence, new limits are {0},{1}".format(new_thresholds[0],new_thresholds[-1])) if save_new: logging.info("Saving new kappa values to {0}...".format(os.path.join(self.save_path,"th_new_peaks.npy"))) np.save(os.path.join(self.save_path,"th_new_peaks.npy"),new_thresholds) #Check the masked fraction of the field of view masked_fraction = self.masked_fraction[smoothing_scale][subfield] ########################################################################################################################################################################################################### #Scale to the non masked area: if we treat each subfield independently (i.e. group_subfields is False) then we need to scale each subfield to the same area when considering the power spectrum and peaks## #if on the other hand we group subfields together, then the power spectrum and peaks are simply added between subfields, but the MFs and the moments need to be scaled##################################### ########################################################################################################################################################################################################### if (self.cmd_args.mask_scale) and not(self.cmd_args.group_subfields): if feature_type=="power_spectrum": logging.log(DEBUG_PLUS,"Scaling power spectrum of subfield {0}, masked fraction {1}, multiplying by {2}".format(subfield,masked_fraction,1.0/(1.0 - masked_fraction)**2)) ens.scale(1.0/(1.0 - masked_fraction)**2) elif feature_type=="peaks": logging.log(DEBUG_PLUS,"Scaling peak counts of subfield {0}, masked fraction {1}, multiplying by {2}".format(subfield,masked_fraction,1.0/(1.0 - masked_fraction))) ens.scale(1.0/(1.0 - masked_fraction)) elif (self.cmd_args.mask_scale) and (self.cmd_args.group_subfields): if "minkowski" in feature_type or "moments" in feature_type: logging.log(DEBUG_PLUS,"Scaling {0} of subfield {1}, masked fraction {2}, multiplying by {3}".format(feature_type,subfield,masked_fraction,(1.0 - masked_fraction)/self.total_non_masked_fraction[smoothing_scale])) ens.scale((1.0 - masked_fraction)/self.total_non_masked_fraction[smoothing_scale]) #Regular expressions to parse the feature string num = re.match(r"minkowski_([0-2]+)",feature_type) momParse = re.match(r"moments(_[qsk][1-4]+)?(_[qsk][1-4]+)?(_[qsk][1-4]+)?",feature_type) assert (num is None) or (momParse is None) if num is not None: ############################################################################### ##MFs only: check if we want to discard some of the Minkowski functionals###### ############################################################################### mink_to_measure = [ int(n_mf) for n_mf in list(num.group(1)) ] ens_split = ens.split(self.mink_idx) #Differentiate Minkowski 0 to find the PDF? if self.cmd_args.differentiate: logging.log(DEBUG_PLUS,"Differentiating Minkowski 0 to get the PDF") ens_split[0] = ens_split[0].differentiate(step=self.kappa_minkowski[0]-self.kappa_minkowski[1]) #Perform the convergence cut if option is enabled if self.cmd_args.cut_convergence: new_thresholds = [ ens_split[n_mf].cut(self.kappa_min,self.kappa_max,feature_label=self.kappa_minkowski) for n_mf in mink_to_measure ] logging.log(DEBUG_PLUS,"Performed cut on the minkowski convergence, new limits are {0},{1}".format(new_thresholds[0][0],new_thresholds[0][-1])) if save_new: logging.info("Saving new kappa values to {0}...".format(os.path.join(self.save_path,"th_new_minkowski.npy"))) np.save(os.path.join(self.save_path,"th_new_minkowski.npy"),new_thresholds[0]) [ ensemble_subfield.append(ens_split[n_mf]) for n_mf in mink_to_measure ] elif momParse is not None: ############################################################################### ##Moments only: check if we want to keep only a subset of the moments########## ############################################################################### momGroups = momParse.groups() mom_indices = list() #Check, one by one, the moment indices for gr in momGroups: if gr is not None: moment_type = gr[1] moment_numbers = [ int(n_mom)-1 for n_mom in gr[2:] ] #Compute offset if moment_type=="q": mom_offset=0 elif moment_type=="s": mom_offset=2 elif moment_type=="k": mom_offset=5 else: raise ValueError("Only quadratic, cubic, quartic moments implemented!") #Append indices to list for mom_num in moment_numbers: mom_indices.append(mom_offset+mom_num) #Slice the ensemble accordingly if len(mom_indices)>0: logging.info("Measuring moments {0}".format(mom_indices)) ens.cut(mom_indices) #Append to subfield ensemble_subfield.append(ens) else: ensemble_subfield.append(ens) if self.cmd_args.cut_convergence: logging.log(DEBUG_PLUS,"Convergence cut on MFs not performed, select minkowski_012 instead of minkowski_all") ############################################################################################# #Add the features to the cumulative subfield ensemble ensemble_all_subfields += reduce(mul,ensemble_subfield) #If option is specified, group all the subfields together, for each realization if self.cmd_args.group_subfields: logging.log(DEBUG_PLUS,"Taking means over the {0} subfields...".format(len(self.subfields))) ensemble_all_subfields.group(group_size=len(self.subfields),kind="sparse") #Return the created ensemble return ensemble_all_subfields
#The only parallelized part is the loading of the ensemble (that's the computationally expensive part) if (pool is not None) and not (pool.is_master()): pool.wait() sys.exit(0) map_list = [ "Data/conv1.fit", "Data/conv2.fit", "Data/conv3.fit", "Data/conv4.fit" ] l_edges = np.arange(200.0, 50000.0, 200.0) thresholds_pk = np.arange(-1.0, 5.0, 0.2) l = 0.5 * (l_edges[:-1] + l_edges[1:]) conv_ensemble = Ensemble.fromfilelist(map_list) conv_ensemble.load(callback_loader=default_callback_loader, pool=pool, l_edges=l_edges) if pool is not None: pool.close() def test_shape(): assert conv_ensemble.num_realizations == len(map_list) assert conv_ensemble.data.shape == (len(map_list), len(l_edges) - 1) def test_power_plot():