def test_exact_weights(self): """Test that the correct pair summary values are computed. """ ids = np.arange(5) decs = np.zeros(5) ras = np.linspace(0, 500, 5) / 3600 redshifts = np.full(5, 2.0) catalog = {"id": ids, "ra": ras, "dec": decs, "redshift": redshifts} pm = pair_maker.PairMaker(self.r_mins, self.r_maxes, self.z_min, self.z_max) output = pm.run(catalog, catalog) rs = Planck15.comoving_distance(2.0).value * np.radians(ras) weights = pair_maker.distance_weight(rs) for r_min, r_max in zip(self.r_mins, self.r_maxes): scale_name = "Mpc%.2ft%.2f" % (r_min, r_max) self.assertEqual(output.iloc[0]["ref_id"], ids[0]) self.assertEqual(output.iloc[0]["redshift"], redshifts[0]) tmp_weights = weights[np.logical_and(rs > r_min, rs < r_max)] self.assertEqual(output.iloc[0]["%s_count" % scale_name], len(tmp_weights)) self.assertAlmostEqual(output.iloc[0]["%s_weight" % scale_name], tmp_weights.sum())
def _run_pair_maker(self): """Run pair maker and create output Returns ------- pm_output : `pandas.DataFrame` Return the output of PairMaker. """ pm = pair_maker.PairMaker(self.r_mins, self.r_maxes, self.z_min, self.z_max, self.weight_power, output_pairs=self.output_path) pm_output = [] for region in [0, 1]: mask = self.catalog["region"] == region region_cat = {"id": self.catalog["id"][mask], "ra": self.catalog["ra"][mask], "dec": self.catalog["dec"][mask], "redshift": self.catalog["redshift"][mask], "region": self.catalog["region"][mask]} pm_output.append(pm.run(region_cat, region_cat)) pm_output = pd.concat(pm_output) pm_output.set_index("ref_id", inplace=True) return pm_output
def test_output_file(self): """Test writing and loading fro the output file. """ tot_scale_name = "Mpc%.2ft%.2f" % (self.r_min, self.r_max) pm = pair_maker.PairMaker(self.r_mins, self.r_maxes, self.z_min, self.z_max, n_write_proc=2, output_pairs=self.output_path, n_z_bins=4) output = pm.run(self.catalog, self.catalog) output.set_index("ref_id", inplace=True) raw_pair_df = pd.read_parquet("%s/region=0/z_bin=1" % self.output_path) raw_pair_df = raw_pair_df.append( pd.read_parquet("%s/region=0/z_bin=2" % self.output_path)) raw_pair_df = raw_pair_df.append( pd.read_parquet("%s/region=0/z_bin=3" % self.output_path)) raw_pair_df = raw_pair_df.append( pd.read_parquet("%s/region=0/z_bin=4" % self.output_path)) raw_pair_df.set_index("ref_id", inplace=True) for r_min, r_max in zip(self.r_mins, self.r_maxes): tot_pair_diff = 0 tot_dist_diff = 0 for ref_id, data_row in output.iterrows(): raw_data = raw_pair_df.loc[ref_id] dists = pair_maker.decompress_distances( raw_data["comp_log_dist"]) scale_name = "Mpc%.2ft%.2f" % (r_min, r_max) sub_dists = dists[np.logical_and(dists > r_min, dists < r_max)] n_pairs = len(sub_dists) dist_weight = pair_maker.distance_weight(sub_dists).sum() pair_diff = 1 - n_pairs / data_row["%s_count" % scale_name] dist_diff = 1 - dist_weight / data_row["%s_weight" % scale_name] if n_pairs == 0: self.assertEqual(n_pairs, data_row["%s_count" % scale_name]) else: self.assertLess(np.fabs(pair_diff), 2 / data_row["%s_count" % scale_name]) if dist_weight == 0: self.assertEqual(dist_weight, data_row["%s_weight" % scale_name]) else: self.assertLess( np.fabs(dist_diff), 1 / data_row["%s_count" % scale_name] * data_row["%s_weight" % scale_name]) if np.isfinite(pair_diff): tot_pair_diff += pair_diff if np.isfinite(dist_diff): tot_dist_diff += dist_diff self.assertAlmostEqual(tot_pair_diff / self.n_objects, 0, places=3) self.assertAlmostEqual(tot_dist_diff / self.n_objects, 0, places=3)
def test_query_tree(self): """Test that the correct number of points are matched in the kdtree. """ pm = pair_maker.PairMaker([1], [10], self.z_min, self.z_max) decs = np.zeros(5) ras = np.linspace(0, 500, 5) / 3600 vects = pm._convert_radec_to_xyz(np.radians(ras), np.radians(decs)) theta_max = np.radians(450 / 3600) dist = 10 / theta_max from scipy.spatial import cKDTree tree = cKDTree(vects) indexes = pm._query_tree(vects[0], tree, dist) self.assertEqual(len(indexes), 4) self.assertEqual([0, 1, 2, 3], indexes)
def test_run(self): """Smoke test that the run method runs to completion and outputs expected values. """ pm = pair_maker.PairMaker(self.r_mins, self.r_maxes, self.z_min, self.z_max) output = pm.run(self.catalog, self.catalog) random_idx = np.random.randint(self.n_objects) expected_values = [ 708, 0.6202522969616155, 4, 6.52884524482144, 531, 133.259605 ] for col, val in zip(self.expected_columns, expected_values): pd_val = output.iloc[random_idx][col] if col == "ref_id": self.assertEqual(pd_val, val) else: self.assertAlmostEqual(pd_val, val)
def setUp(self): # Seed all random numbers for reproducibility. np.random.seed(1234) # Create a random catalog centered at the pole with a redshift # distribution that looks kind of like a mag limited sample. self.n_objects = 1000 decs = np.degrees( np.pi / 2 - np.arccos(np.random.uniform(np.cos(np.radians(1.0)), np.cos(0), size=self.n_objects))) ras = np.random.uniform(0, 360, size=self.n_objects) redshifts = np.random.lognormal(mean=-1, sigma=0.5, size=self.n_objects) ids = np.arange(self.n_objects) catalog = {"id": ids, "ra": ras, "dec": decs, "redshift": redshifts} self.z_min = 0.1 self.z_max = 1.1 pm = pair_maker.PairMaker([1,], [10,], self.z_min, self.z_max) self.pair_counts = pm.run(catalog, catalog) self.pairs = pd.DataFrame([ {"redshift": 0.2, "tot_sample": 10, "ave_unkn_weight": 1.0, "Mpc1.00t10.00_count": 5, "Mpc1.00t10.00_weight": 2.5}, {"redshift": 0.4, "tot_sample": 10, "ave_unkn_weight": 1.0, "Mpc1.00t10.00_count": 5, "Mpc1.00t10.00_weight": 2.5}, {"redshift": 0.6, "tot_sample": 10, "ave_unkn_weight": 1.0, "Mpc1.00t10.00_count": 5, "Mpc1.00t10.00_weight": 2.5}, {"redshift": 0.8, "tot_sample": 10, "ave_unkn_weight": 1.0, "Mpc1.00t10.00_count": 5, "Mpc1.00t10.00_weight": 2.5}, {"redshift": 1.0, "tot_sample": 10, "ave_unkn_weight": 1.0, "Mpc1.00t10.00_count": 5, "Mpc1.00t10.00_weight": 2.5}, {"redshift": 1.2, "tot_sample": 10, "ave_unkn_weight": 1.0, "Mpc1.00t10.00_count": 5, "Mpc1.00t10.00_weight": 2.5}]) self.ref_weights = np.array([1., 0.5, 1., 0.5, 1, 0.5])