def sample_filter(ref_col_name, frac, random_state=None): """ Returns a GCRQuery object to be used in the `filters` argument of get_quantities() to randomly sample the object catalog by a given fraction (*frac*). *ref_col_name* must be a column of integer values. Optionally, provide *random_state* (int or np.random.RandomState) to fix random state. """ # pylint: disable=no-member frac = float(frac) if frac > 1 or frac < 0: raise ValueError("`frac` must be a float number in [0, 1].") if frac == 1: return GCRQuery() if frac == 0: return GCRQuery( (lambda a: np.zeros_like(a, dtype=np.bool), ref_col_name)) if not isinstance(random_state, np.random.RandomState): random_state = np.random.RandomState(random_state) seed = random_state.randint(2**32) def _sampler(arr, frac=frac, seed=seed): size = len(arr) # arr is a numpy array of integers if size: return np.random.RandomState( (int(arr[0]) + seed) % (2**32)).rand(size) < frac return np.zeros(0, dtype=np.bool) return GCRQuery((_sampler, ref_col_name))
def partition_filter(partition_name, ids, id_high=None): """ Returns a GCRQuery object to be used in the `native_filters` argument of get_quantities(), to select a subset of partitions. *partition_name* must be a "native filter quantity" in GCR, and the partitions must be specified by integer IDs. Existing examples include "tract" for object catalogs and "healpix_pixel" for cosmoDC2. If *ids* is a single integer, select only that partition. If *ids* and *id_high* are both given as single integers, select [ids, id_high] (inclusive on both ends!). If *ids* is a list, select partitions in that list (*id_high* is ignored). """ if isinstance(ids, int): if id_high is None: return GCRQuery(f"{partition_name} == {ids}") elif isinstance(id_high, int): return GCRQuery(f"{partition_name} >= {ids}", f"{partition_name} <= {id_high}") raise ValueError( f"When `{partition_name}s` is an integer, `{partition_name}_high` must be an integer or None." ) ids = np.unique(np.asarray(ids, dtype=np.int)) if not ids.size: raise ValueError(f"Must select at least one {partition_name}.") def _partition_selector(partition_ids, ids_to_select=ids): return np.isin(partition_ids, ids_to_select, assume_unique=True) return GCRQuery((_partition_selector, partition_name))
def create_test_sample(self, catalog_data, test_sample): """ Select a subset of the catalog data an input test sample. This function should be overloaded in inherited classes for more complex cuts (e.g. color cuts). Parameters ---------- catalog_data : a GenericCatalogReader catalog instance test_sample : dictionary of dictionaries A dictionary specifying the columns to cut on and the min/max values of the cut. Example: {mag: {min: -23, max: -22} z: {min: 0.1031, max: 0.2452}} Returns ------- A GenericCatalogReader catalog instance cut to the requested bounds. """ filters = [] for key in test_sample.keys(): filters.extend(( '{} < {}'.format(key, test_sample[key]['max']), '{} >= {}'.format(key, test_sample[key]['min']), )) return GCRQuery(*filters).filter(catalog_data)
def create_test_sample(catalog_data, test_sample): """ Select a subset of the catalog data an input test sample. This function should be overloaded in inherited classes for more complex cuts (e.g. color cuts). Parameters ---------- catalog_data : a GenericCatalogReader catalog instance test_sample : dictionary of dictionaries A dictionary specifying the columns to cut on and the min/max values of the cut. Example: {mag: {min: -23, max: -22} z: {min: 0.1031, max: 0.2452}} Returns ------- A GenericCatalogReader catalog instance cut to the requested bounds. """ filters = [] for key, condition in test_sample.items(): if isinstance(condition, dict): if 'max' in condition: filters.append('{} < {}'.format(key, condition['max'])) if 'min' in condition: filters.append('{} >= {}'.format(key, condition['min'])) else: #customized filter filters.append(condition) return GCRQuery(*filters).filter(catalog_data)
def _init_data_indices(self): """ Do the spatial filtering of extragalactic catalog data. """ self._native_filters = None descqa_catalog = self._descqa_obj._catalog if self._obs_metadata is None or self._obs_metadata._boundLength is None: self._data_indices = np.arange( self._descqa_obj._catalog['raJ2000'].size) else: try: radius_rad = max(self._obs_metadata._boundLength[0], self._obs_metadata._boundLength[1]) except (TypeError, IndexError): radius_rad = self._obs_metadata._boundLength if 'healpix_pixel' in descqa_catalog._native_filter_quantities: ra_rad = self._obs_metadata._pointingRA dec_rad = self._obs_metadata._pointingDec vv = np.array([ np.cos(dec_rad) * np.cos(ra_rad), np.cos(dec_rad) * np.sin(ra_rad), np.sin(dec_rad) ]) healpix_list = healpy.query_disc(8, vv, radius_rad, inclusive=True, nest=False) healpix_filter = None for hh in healpix_list: local_filter = GCRQuery('healpix_pixel==%d' % hh) if healpix_filter is None: healpix_filter = local_filter else: healpix_filter |= local_filter if healpix_filter is not None: if self._native_filters is None: self._native_filters = [healpix_filter] else: self._native_filters.append(healpix_filter) ra_dec = descqa_catalog.get_quantities( ['raJ2000', 'decJ2000'], native_filters=self._native_filters) ra = ra_dec['raJ2000'] dec = ra_dec['decJ2000'] self._data_indices = np.where(_angularSeparation(ra, dec, \ self._obs_metadata._pointingRA, \ self._obs_metadata._pointingDec) < radius_rad)[0] if self._chunk_size is None: self._chunk_size = self._data_indices.size
def get_catalog_data(gc, quantities, filters=None): data = {} if not gc.has_quantities(quantities): return TestResult(skipped=True, summary='Missing requested quantities') data = gc.get_quantities(quantities, filters=filters) #make sure data entries are all finite data = GCRQuery(*((np.isfinite, col) for col in data)).filter(data) return data
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): prepared = self.prepare_galaxy_catalog(catalog_instance) if prepared is None: return TestResult(skipped=True) if self.use_redmapper: try: redmapper = GCRCatalogs.load_catalog(catalog_name+'_redmapper') except: return TestResult(skipped=True) redmapper = redmapper.get_quantities(['galaxy_id']) absolute_magnitude_field, mag_fields, quantities_needed = prepared bins = (self.z_bins, self.c_bins, self.mass_bins) hist_cen = np.zeros((self.n_z_bins, self.n_c_bins, self.n_mass_bins, self.n_bands-1)) hist_sat = np.zeros_like(hist_cen) hist_mem_cen = np.zeros_like(hist_cen) hist_mem_sat = np.zeros_like(hist_cen) print(absolute_magnitude_field) cen_query = GCRQuery('is_central & ({} < -19)'.format(absolute_magnitude_field)) sat_query = GCRQuery('(~is_central) & ({} < -19)'.format(absolute_magnitude_field)) if 'r_host' in quantities_needed and 'r_vir' in quantities_needed: sat_query &= GCRQuery('r_host < r_vir') for data in catalog_instance.get_quantities(quantities_needed, return_iterator=True): cen_mask = cen_query.mask(data) sat_mask = sat_query.mask(data) if self.use_redmapper: mem_mask = np.in1d(data['galaxy_id'], redmapper['galaxy_id']) for i in range(self.n_bands-1): color = data[mag_fields[i]] - data[mag_fields[i+1]] hdata = np.stack([data['redshift_true'], color, data['halo_mass']]).T hist_cen[:,:,:,i] += np.histogramdd(hdata[cen_mask], bins)[0] hist_sat[:,:,:,i] += np.histogramdd(hdata[sat_mask], bins)[0] if self.use_redmapper: hist_mem_cen[:,:,:,i] += np.histogramdd(hdata[mem_mask & cen_mask], bins)[0] hist_mem_sat[:,:,:,i] += np.histogramdd(hdata[mem_mask & sat_mask], bins)[0] data = cen_mask = sat_mask = mem_mask = None rs_mean, rs_scat, red_frac_sat, red_frac_cen = self.compute_summary_statistics(hist_sat, hist_cen, hist_mem_sat, hist_mem_cen) red_seq = {'rs_mean':rs_mean, 'rs_scat':rs_scat, 'red_frac_sat':red_frac_sat, 'red_frac_cen':red_frac_cen} self.make_plot(red_seq, hist_cen, hist_sat, hist_mem_cen, hist_mem_sat, catalog_name, os.path.join(output_dir, 'red_sequence.png')) return TestResult(inspect_only=True)
def get_smass(catalog_instance): """ Parameters ---------- catalog_instance = Catalogue to use Returns ------- - log10 of stellar mass with CMASS color and magnitude cuts applied - number density of galaxies (galaxies per square degree) """ gc = catalog_instance sky_area = float(gc.sky_area) cols = { "smass": gc.first_available("stellar_mass"), "g": gc.first_available("mag_true_g_lsst"), "r": gc.first_available("mag_true_r_lsst"), "i": gc.first_available("mag_true_i_lsst"), } if not all(cols.values()): raise KeyError("Not all needed quantities exist!!") valid_smass = GCRQuery("{smass} > 0".format(**cols)) cmass_cuts = GCRQuery( "({r} - {i}) - ({g} - {r}) / 8 > 0.55".format(**cols), "{i} < 19.86 + 1.6 * (({r} - {i}) - ({g} - {r}) / 8 - 0.8)".format(**cols), "{i} < 19.9".format(**cols), "{i} > 17.5".format(**cols), "{r} - {i} < 2".format(**cols), ) log_smass_cmass = np.log10(gc.get_quantities([cols["smass"]], filters=[valid_smass, cmass_cuts])[cols["smass"]]) print() print("minimum cmass-cut = ", np.min(log_smass_cmass)) print("maximum cmass-cut = ", np.max(log_smass_cmass)) print() numDen = len(log_smass_cmass) / sky_area return log_smass_cmass, numDen
def create_test_sample(catalog_data, test_sample, h=1): """ Select a subset of the catalog data an input test sample. This function should be overloaded in inherited classes for more complex cuts (e.g. color cuts). Parameters ---------- catalog_data : a GenericCatalogReader catalog instance test_sample : dictionary of dictionaries A dictionary specifying the columns to cut on and the min/max values of the cut. Example: {Mag: {min: -23, max: -22} z: {min: 0.1031, max: 0.2452}} Returns ------- A GenericCatalogReader catalog instance cut to the requested bounds. """ filters = [] Mag_shift = 5 * np.log10( h ) # Magnitude shift to adjust for h=1 units in data (eg Zehavi et. al.) for key, condition in test_sample.items(): if isinstance(condition, dict): if 'max' in condition: max_value = condition[ 'max'] + Mag_shift if 'Mag' in key else condition['max'] filters.append('{} < {}'.format(key, max_value)) if 'min' in condition: min_value = condition[ 'min'] + Mag_shift if 'Mag' in key else condition['min'] filters.append('{} >= {}'.format(key, min_value)) else: #customized filter if 'Mag_shift' in condition: condition = re.sub('Mag_shift', '{:0.2f}'.format(Mag_shift), condition) print('Substituted filter to adjust for Mag shifts: {}'. format(condition)) filters.append(condition) print('Test sample filters for {}'.format(test_sample), filters) return GCRQuery(*filters).filter(catalog_data)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): mags = { catalog_instance.first_available('mag_{}_cModel'.format(b), 'mag_true_{}'.format(b)): 'mag_{}'.format(b) for b in self.bands } qs = ['ra', 'dec'] qs = qs + list(mags) # Trick to read both true and observed magnitudes by @Yao filters = [GCRQuery(self.selection_cuts)] data = catalog_instance.get_quantities(qs, filters=filters) data = {mags.get(k, k): v for k, v in data.items()} print('Selected %d objects for catalog %s' % (len(data), catalog_name)) self.ra[catalog_name] = data['ra'] self.dec[catalog_name] = data['dec'] for band in self.bands: self.magnitude[(catalog_name, band)] = data['mag_%s' % band] return TestResult(inspect_only=True)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): prepared = self.prepare_galaxy_catalog(catalog_instance) if prepared is None: return TestResult(skipped=True) absolute_magnitude1_field, absolute_magnitude2_field, quantities_needed = prepared colnames = [absolute_magnitude2_field, 'halo_mass', 'redshift_true'] bins = (self.magnitude_bins, self.mass_bins, self.z_bins) hist_cen = np.zeros( (self.n_magnitude_bins, self.n_mass_bins, self.n_z_bins)) hist_sat = np.zeros_like(hist_cen) red_query = GCRQuery((self.color_cut, absolute_magnitude1_field, absolute_magnitude2_field, 'redshift_true')) cen_query = GCRQuery('is_central') & red_query sat_query = ~GCRQuery('is_central') & red_query if 'r_host' in quantities_needed and 'r_vir' in quantities_needed: sat_query &= GCRQuery('r_host < r_vir') for data in catalog_instance.get_quantities(quantities_needed, return_iterator=True): cen_mask = cen_query.mask(data) sat_mask = sat_query.mask(data) data = np.stack((data[k] for k in colnames)).T hist_cen += np.histogramdd(data[cen_mask], bins)[0] hist_sat += np.histogramdd(data[sat_mask], bins)[0] data = cen_mask = sat_mask = None halo_counts = hist_cen.sum(axis=0) clf = dict() clf['sat'] = hist_sat / halo_counts clf['cen'] = hist_cen / halo_counts clf['tot'] = clf['sat'] + clf['cen'] self.make_plot(clf, catalog_name, os.path.join(output_dir, 'clf.png')) return TestResult(passed=True, score=0)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): bands = set(sum((c.split('-') for c in self.colors), [])) possible_names = ('mag_{}_sdss', 'mag_{}_des', 'mag_true_{}_sdss', 'mag_true_{}_des') labels = { band: catalog_instance.first_available(*(n.format(band) for n in possible_names)) for band in bands } labels = {k: v for k, v in labels.items() if v} if len(labels) < 2: return TestResult( skipped=True, summary= 'magnitudes in mock catalog do not have at least two needed bands.' ) filters = set((v.rpartition('_')[-1] for v in labels.values())) if len(filters) > 1: return TestResult( skipped=True, summary='magnitudes in mock catalog have mixed filters.') filter_this = filters.pop() labels['redshift'] = 'redshift_true' if not catalog_instance.has_quantity(labels['redshift']): return TestResult(skipped=True, summary='mock catalog does not have redshift.') # Load mock catalog data filters = [ '{} > {}'.format(labels['redshift'], self.zlo), '{} < {}'.format(labels['redshift'], self.zhi) ] data = catalog_instance.get_quantities(list(labels.values()), filters) data = {k: data[v] for k, v in labels.items()} # Color transformation if self.color_transformation_q: color_trans = None if self.validation_catalog == 'DEEP2': color_trans = color_transformation['{}2cfht'.format( filter_this)] elif self.validation_catalog == 'SDSS' and filter_this == 'des': color_trans = color_transformation['des2sdss'] if color_trans: data_transformed = {} for band in bands: try: data_transformed[band] = ne.evaluate(color_trans[band], local_dict=data, global_dict={}) except KeyError: continue data_transformed['redshift'] = data['redshift'] data = data_transformed del data_transformed data = GCRQuery('r < {}'.format(self.obs_r_mag_limit)).filter(data) # Compute color distribution (PDF, CDF etc.) mock_color_dist = self.get_color_dist(data) # Calculate Cramer-von Mises statistic color_shift = {} cvm_omega = {} cvm_omega_shift = {} for color in self.colors: if not ((color in self.obs_color_dist) and (color in mock_color_dist)): continue color_shift[color] = self.obs_color_dist[color][ 'median'] - mock_color_dist[color]['median'] cvm_omega[color] = CvM_statistic( mock_color_dist[color]['nsample'], self.obs_color_dist[color]['nsample'], mock_color_dist[color]['binctr'], mock_color_dist[color]['cdf'], self.obs_color_dist[color]['binctr'], self.obs_color_dist[color]['cdf']) cvm_omega_shift[color] = CvM_statistic( mock_color_dist[color]['nsample'], self.obs_color_dist[color]['nsample'], mock_color_dist[color]['binctr'] + color_shift[color], mock_color_dist[color]['cdf'], self.obs_color_dist[color]['binctr'], self.obs_color_dist[color]['cdf']) self.make_plots(mock_color_dist, color_shift, cvm_omega, cvm_omega_shift, catalog_name, output_dir) # Write to summary file fn = os.path.join(output_dir, self.summary_output_file) with open(fn, 'a') as f: f.write('%2.3f < z < %2.3f\n' % (self.zlo, self.zhi)) f.write('r_mag < %2.3f\n\n' % (self.obs_r_mag_limit)) for color in self.colors: if not ((color in self.obs_color_dist) and (color in mock_color_dist)): continue f.write("Median " + color + " difference (obs - mock) = %2.3f\n" % (color_shift[color])) f.write(color + ": {} = {:2.6f}\n".format( 'CvM statistic', cvm_omega[color])) f.write(color + " (shifted): {} = {:2.6f}\n".format( 'CvM statistic', cvm_omega_shift[color])) f.write("\n") return TestResult(inspect_only=True)
# Solution for Challenge 2 of DC2 Coadd Run1.1p GCR tutorial Part III: Guided Challenges import numpy as np import matplotlib.pyplot as plt import GCRCatalogs from GCR import GCRQuery catalog = GCRCatalogs.load_catalog('dc2_coadd_run1.1p') filters=[ GCRQuery('extendedness == 0'), GCRQuery('clean'), GCRQuery('blendedness < 10**(-0.375)'), ~GCRQuery('I_flag'), GCRQuery('i_SNR > 21') ] g1_modif = lambda ixx,iyy,ixy: (ixx-iyy)/(ixx+iyy) g2_modif = lambda ixx,iyy,ixy: 2.*ixy/(ixx+iyy) sigma_modif = lambda ixx,iyy,ixy: (ixx*iyy - ixy**2)**0.25 catalog.add_derived_quantity('g1', g1_modif, 'Ixx', 'Iyy', 'Ixy') catalog.add_derived_quantity('g2', g2_modif, 'Ixx', 'Iyy', 'Ixy') catalog.add_derived_quantity('sigma', sigma_modif, 'Ixx', 'Iyy', 'Ixy') catalog.add_derived_quantity('psf_g1', g1_modif, 'IxxPSF', 'IyyPSF', 'IxyPSF') catalog.add_derived_quantity('psf_g2', g2_modif, 'IxxPSF', 'IyyPSF', 'IxyPSF') catalog.add_derived_quantity('psf_sigma', sigma_modif, 'IxxPSF', 'IyyPSF', 'IxyPSF') quantities = ['ra', 'dec', 'mag_i', 'i_SNR', 'psf_fwhm_i',
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): #update color and marker to preserve catalog colors and markers across tests catalog_color = next(self._color_iterator) #add quantities to catalog if needed for band in self.native_luminosities: if catalog_instance.has_quantity(self.native_luminosities[band]): catalog_instance.add_quantity_modifier( 'Mag_true_{}_z0'.format(band), (lambda x: -2.5 * np.log10(x), self.native_luminosities[band])) print('Checking for required quantities') #check catalog data for required quantities required_quantities = [] for pgroup in self.possible_quantities: found_quantity = catalog_instance.first_available(*pgroup) if found_quantity is not None: required_quantities.append(found_quantity) if not catalog_instance.has_quantities(required_quantities + self.filter_quantities): return TestResult( skipped=True, summary='Missing some required quantities: {}'.format( ', '.join(required_quantities))) if self.ancillary_quantities is not None and not catalog_instance.has_quantities( self.ancillary_quantities): return TestResult( skipped=True, summary='Missing some ancillary quantities: {}'.format( ', '.join(self.ancillary_quantities))) mag_field = catalog_instance.first_available(*self.possible_mag_fields) if not mag_field: return TestResult( skipped=True, summary='Missing needed quantities to make magnitude cuts') Mag_field = catalog_instance.first_available(*self.possible_Mag_fields) if not Mag_field: return TestResult( skipped=True, summary='Missing needed quantities to make magnitude cuts') all_quantities = required_quantities + [mag_field, Mag_field ] + self.filter_quantities if self.ancillary_quantities is not None: all_quantities = all_quantities + self.ancillary_quantities print('Fetching quantities', all_quantities) mag_filtername = str(mag_field.split('_')[-2]) Mag_filtername = str(Mag_field.split('_')[2]) filelabel = '_'.join(('m', mag_filtername, 'M', Mag_filtername)) #setup plots fig, ax = plt.subplots(self.nrows, self.ncolumns, sharex='col') fig.text(self.yaxis_xoffset, self.yaxis_yoffset, self.yaxis, va='center', rotation='vertical') #setup a common axis label #initialize arrays for storing histogram sums N_array = np.zeros((self.nrows, self.ncolumns, len(self.ebins) - 1), dtype=np.int) sume_array = np.zeros((self.nrows, self.ncolumns, len(self.ebins) - 1)) sume2_array = np.zeros( (self.nrows, self.ncolumns, len(self.ebins) - 1)) #get catalog data by looping over data iterator (needed for large catalogs) and aggregate histograms for catalog_data in catalog_instance.get_quantities( all_quantities, filters=self.filters, return_iterator=True): catalog_data = GCRQuery( *((np.isfinite, col) for col in catalog_data)).filter(catalog_data) for morphology, N, sume, sume2 in zip_longest( self.morphology, N_array.reshape( -1, N_array.shape[-1] ), #flatten all but last dimension of array sume_array.reshape(-1, sume_array.shape[-1]), sume2_array.reshape(-1, sume2_array.shape[-1]), ): #make cuts if morphology is not None: mask = (catalog_data[mag_field] < self.mag_lo.get(morphology)) mask &= ( self.Mag_hi.get(morphology) < catalog_data[Mag_field] ) & (catalog_data[Mag_field] < self.Mag_lo.get(morphology)) if self.ancillary_quantities is not None: for aq, key in zip_longest( self.ancillary_quantities, self.validation_data['cuts'].get( 'ancillary_keys')): mask &= (self.validation_data['cuts'][morphology].get(key+'_min') < catalog_data[aq]) &\ (catalog_data[aq] < self.validation_data['cuts'][morphology].get(key+'_max')) print( 'Number of {} galaxies passing selection cuts for morphology {} = {}' .format(catalog_name, morphology, np.sum(mask))) #compute ellipticity from definition e_this = self.ellipticity_function( *[catalog_data[q][mask] for q in required_quantities]) #print('mm', np.min(e_this), np.max(e_this)) del mask #accumulate histograms N += np.histogram(e_this, bins=self.ebins)[0] sume += np.histogram(e_this, bins=self.ebins, weights=e_this)[0] sume2 += np.histogram(e_this, bins=self.ebins, weights=e_this**2)[0] #check that catalog has entries for quantity to be plotted if not np.asarray([N.sum() for N in N_array]).sum(): raise ValueError('No data found for quantities {}'.format( ', '.join(required_quantities))) #make plots results = {} for n, (ax_this, summary_ax_this, morphology, N, sume, sume2) in enumerate( zip_longest( ax.flat, self.summary_ax.flat, self.morphology, N_array.reshape( -1, N_array.shape[-1] ), #flatten all but last dimension of array sume_array.reshape(-1, sume_array.shape[-1]), sume2_array.reshape(-1, sume2_array.shape[-1]), )): if morphology is not None: #get labels cutlabel = '${} < {} < {}$; ${} < {}$; {}'.format(str(self.Mag_hi.get(morphology)), Mag_filtername, str(self.Mag_lo.get(morphology)),\ mag_filtername, str(self.mag_lo.get(morphology)), morphology) ancillary_label = [] if self.ancillary_quantities is not None: for key in self.validation_data['cuts'].get( 'ancillary_keys'): ancillary_label.append('${} <$ {} $< {}$'.format(str(self.validation_data['cuts'][morphology].get(key+'_min')),\ key, str(self.validation_data['cuts'][morphology].get(key+'_max')))) ancillary_label = '; '.join(ancillary_label) catalog_label = '; '.join((catalog_name, ancillary_label)) validation_label = ' '.join( (self.validation_data.get('label', ''), morphology)) reskey = cutlabel.replace('$', '') #get points to be plotted e_values = sume / N sumN = N.sum() total = '(# of galaxies = {})'.format(sumN) Nerrors = np.sqrt(N) if self.normed: binwidths = self.ebins[1:] - self.ebins[:-1] N = N / sumN / binwidths Nerrors = Nerrors / sumN / binwidths results[reskey] = {'catalog':{'e_ave':e_values, 'N':N, 'N+':N+Nerrors, 'N-':N-Nerrors,\ 'total':total, 'xtralabel':ancillary_label.replace('$', '')}} self.catalog_subplot(ax_this, e_values, N, catalog_color, catalog_label) results[reskey]['validation'] = self.validation_subplot( ax_this, self.validation_data.get(morphology), validation_label) self.decorate_subplot(ax_this, n, label=cutlabel) #add curve for this catalog to summary plot self.catalog_subplot(summary_ax_this, e_values, N, catalog_color, catalog_label, errors=Nerrors) if self.first_pass: #add validation data if evaluating first catalog self.validation_subplot( summary_ax_this, self.validation_data.get(morphology), validation_label) self.decorate_subplot(summary_ax_this, n, label=cutlabel) else: #make empty subplots invisible ax_this.set_visible(False) summary_ax_this.set_visible(False) #save results for catalog and validation data in txt files for filename, dkey, dtype, info in zip_longest( (catalog_name, self.observation), ('catalog', 'validation'), ('N', 'data'), ('total', )): if filename: with open( os.path.join( output_dir, ''.join([ 'Nvs', self.file_label, '_', filelabel + '.txt' ])), 'ab') as f_handle: #open file in append mode #loop over cuts in results dict for key, value in results.items(): self.save_quantities(dtype, value[dkey], f_handle, comment=' '.join( (key, value[dkey].get( 'xtralabel', ''), value[dkey].get(info, '')))) if self.first_pass: #turn off validation data plot in summary for remaining catalogs self.first_pass = False #make final adjustments to plots and save figure self.post_process_plot(fig) fig.savefig( os.path.join( output_dir, ''.join(['Nvs', self.file_label, '_', filelabel + '.png']))) plt.close(fig) return TestResult(inspect_only=True)
def do_fitting(cat, component, healpix, lim, n_threads): """ Fit a set of components to SEDs, Av, Rv, magNorm using sed_from_galacticus_mags Parameters ---------- cat -- the result of GCRCatalogs.load_catalog('catalog_name') component -- a string; either 'disk' or 'bulge' healpix -- an int indicating which healpixel to fit lim -- an int indicating how many objects to actually fit Returns ------- numpy arrays of: redshift galaxy_id sed_name magNorm Av Rv """ filter_data = sed_filter_names_from_catalog(cat) filter_names = filter_data[component]['filter_name'] lsst_filter_names = filter_data[component]['lsst_fluxes'] wav_min = filter_data[component]['wav_min'] wav_width = filter_data[component]['wav_width'] H0 = cat.cosmology.H0.value Om0 = cat.cosmology.Om0 healpix_query = GCRQuery('healpix_pixel==%d' % healpix) qties = cat.get_quantities(list(filter_names) + list(lsst_filter_names) + ['redshift', 'redshift_true', 'galaxy_id'], native_filters=[healpix_query]) with np.errstate(divide='ignore', invalid='ignore'): mag_array = np.array( [-2.5 * np.log10(qties[ff][:lim]) for ff in filter_names]) lsst_mag_array = np.array( [-2.5 * np.log10(qties[ff][:lim]) for ff in lsst_filter_names]) redshift = qties['redshift'][:lim] redshift_true = qties['redshift_true'][:lim] (sed_names, mag_norms, av_arr, rv_arr) = sed_from_galacticus_mags(mag_array[:, :2], redshift[:2], redshift_true[:2], H0, Om0, wav_min, wav_width, lsst_mag_array[:, :2]) mgr = multiprocessing.Manager() out_dict = mgr.dict() d_gal = len(redshift) // n_threads p_list = [] for i_start in range(0, len(redshift), d_gal): s = slice(i_start, i_start + d_gal) p = multiprocessing.Process( target=_parallel_fitting, args=(mag_array[:, s], redshift[s], redshift_true[s], H0, Om0, wav_min, wav_width, lsst_mag_array[:, s], out_dict, i_start)) p.start() p_list.append(p) for p in p_list: p.join() sed_names = np.empty(len(redshift), dtype=(str, 200)) mag_norms = np.zeros((6, len(redshift)), dtype=float) av_arr = np.zeros(len(redshift), dtype=float) rv_arr = np.zeros(len(redshift), dtype=float) lsst_fluxes = np.zeros((6, len(redshift)), dtype=float) t_start_slicing = time.time() for i_start in out_dict.keys(): s = slice(i_start, i_start + d_gal) sed_names[s] = out_dict[i_start][0] mag_norms[:, s] = out_dict[i_start][1] av_arr[s] = out_dict[i_start][2] rv_arr[s] = out_dict[i_start][3] lsst_fluxes[:, s] = out_dict[i_start][4] return (redshift, qties['galaxy_id'][:lim], sed_names, mag_norms, av_arr, rv_arr, lsst_fluxes)
default=0, help='number of galaxies to randomly validate ' '(defaults to zero)') args = parser.parse_args() assert args.healpix is not None assert args.out_dir is not None assert args.out_name is not None if not os.path.isdir(args.out_dir): os.makedirs(args.out_dir) sed_dir = getPackageDir('sims_sed_library') print('loading %s' % args.catalog) cat = GCRCatalogs.load_catalog(args.catalog) h_query = GCRQuery('healpix_pixel==%d' % args.healpix) if args.lim is None or args.lim < 0: gid = cat.get_quantities('galaxy_id', native_filters=[h_query])['galaxy_id'] args.lim = 2 * len(gid) out_file_name = os.path.join(args.out_dir, args.out_name) t_start = time.time() ########## actually fit SED, magNorm, and dust parameters to disks and bulges t0 = 1539899570.0 print('starting %d at %.2f' % (args.healpix, time.time() - t0)) (disk_redshift, disk_id, disk_sed_name, disk_magnorm, disk_av, disk_rv, disk_lsst_fluxes) = do_fitting(cat, 'disk', args.healpix, args.lim,
maskedmags = mags[mask][(snr[mask]>snrthreshold-1) & (snr[mask]<snrthreshold+1)] if len(maskedmags)>0: map_out[ind] = np.mean(maskedmags) map_var_out[ind] = np.std(maskedmags) dtype = [('out','float'), ('var_out','float')] rec_mp = np.rec.fromarrays([map_out, map_var_out], dtype=dtype) hsp_mp = hsp.HealSparseMap.makeEmpty(32, nsideSparse, dtype=dtype, primary='out') hsp_mp.updateValues(pix_uni, rec_mp) return hsp_mp catalog = GCRCatalogs.load_catalog('dc2_object_run1.2i_all_columns_with_photoz') band = 'i' simple_cuts = [ GCRQuery('clean'), GCRQuery('detect_isPrimary'), GCRQuery((np.isfinite, 'ra')), GCRQuery((np.isfinite, 'dec')), GCRQuery((np.isfinite, 'mag_%s_cModel'%band)), GCRQuery((np.isfinite, 'snr_%s_cModel'%band)) ] # Loads the data after cut data_cut = catalog.get_quantities(['ra', 'dec', 'snr_%s_cModel'%band, 'mag_%s_cModel'%band], filters = simple_cuts) ra,dec = data_cut['ra'], data_cut['dec'] mags = data_cut['mag_%s_cModel'%band] snr = data_cut['snr_%s_cModel'%band] hsp_mp = depth_map_meanSNRrange(ra, dec, mags, snr, 5, 2048) hsp_mp.write('depth_map.fits', clobber=True)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): ''' Loop over magnitude cuts and make plots ''' # load catalog data spl = redshift2dist(catalog_instance.cosmology) colnames = dict() colnames['z'] = catalog_instance.first_available( 'redshift', 'redshift_true') colnames['mag'] = catalog_instance.first_available( *self.possible_mag_fields) if self.observation == 'onecomp': colnames['size'] = catalog_instance.first_available( 'size', 'size_true') elif self.observation == 'twocomp': colnames['size_bulge'] = catalog_instance.first_available( 'size_bulge', 'size_bulge_true') colnames['size_disk'] = catalog_instance.first_available( 'size_disk', 'size_disk_true') if not all(v for v in colnames.values()): return TestResult(skipped=True, summary='Missing requested quantities') #Check whether the columns are finite or not filters = [(np.isfinite, c) for c in colnames.values()] #Select objects within maximum and minimum redshift of all the bins filters.extend(( '{} < {}'.format(colnames['z'], max(z_bin['z_max'] for z_bin in self.z_bins)), '{} >= {}'.format(colnames['z'], min(z_bin['z_min'] for z_bin in self.z_bins)), )) catalog_data = catalog_instance.get_quantities(list(colnames.values()), filters=filters) catalog_data = {k: catalog_data[v] for k, v in colnames.items()} fig, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True) try: col = 0 row = 0 for z_bin in self.z_bins: ax = axes[row, col] # filter catalog data for this bin filters = [ 'z < {}'.format(z_bin['z_max']), 'z >= {}'.format(z_bin['z_min']), ] catalog_data_this = GCRQuery(*filters).filter(catalog_data) if len(catalog_data_this['z']) == 0: continue z_mean = (z_bin['z_max'] + z_bin['z_min']) / 2. output_filepath = os.path.join( output_dir, self.output_filename_template.format( z_bin['z_min'], z_bin['z_max'])) colors = ['r', 'b'] default_L_bin_edges = np.array([9, 9.5, 10, 10.5, 11, 11.5]) default_L_bins = (default_L_bin_edges[1:] + default_L_bin_edges[:-1]) / 2. if self.observation == 'onecomp': logL_G = self.ConvertAbsMagLuminosity( catalog_data_this['mag'], 'g') size_kpc = catalog_data_this[ 'size'] * self._ARCSEC_TO_RADIAN * interpolate.splev( catalog_data_this['z'], spl) / (1 + catalog_data_this['z']) binned_size_kpc = binned_statistic( logL_G, size_kpc, bins=default_L_bin_edges, statistic='mean')[0] binned_size_kpc_err = binned_statistic( logL_G, size_kpc, bins=default_L_bin_edges, statistic='std')[0] np.savetxt( output_filepath, np.transpose((default_L_bins, binned_size_kpc, binned_size_kpc_err))) validation_this = self.validation_data[ (self.validation_data[:, 0] < z_mean + 0.25) & (self.validation_data[:, 0] > z_mean - 0.25)] ax.semilogy(validation_this[:, 1], 10**validation_this[:, 2], label=self.label_template.format( z_bin['z_min'], z_bin['z_max'])) ax.fill_between(validation_this[:, 1], 10**validation_this[:, 3], 10**validation_this[:, 4], lw=0, alpha=0.2) ax.errorbar(default_L_bins, binned_size_kpc, binned_size_kpc_err, marker='o', ls='') elif self.observation == 'twocomp': logL_I = self.ConvertAbsMagLuminosity( catalog_data_this['mag'], 'i') arcsec_to_kpc = self._ARCSEC_TO_RADIAN * interpolate.splev( catalog_data_this['z'], spl) / (1 + catalog_data_this['z']) binned_bulgesize_kpc = binned_statistic( logL_I, catalog_data_this['size_bulge'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='mean')[0] binned_bulgesize_kpc_err = binned_statistic( logL_I, catalog_data_this['size_bulge'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='std')[0] binned_disksize_kpc = binned_statistic( logL_I, catalog_data_this['size_disk'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='mean')[0] binned_disksize_kpc_err = binned_statistic( logL_I, catalog_data_this['size_disk'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='std')[0] binned_bulgesize_kpc = np.nan_to_num(binned_bulgesize_kpc) binned_bulgesize_kpc_err = np.nan_to_num( binned_bulgesize_kpc_err) binned_disksize_kpc = np.nan_to_num(binned_disksize_kpc) binned_disksize_kpc_err = np.nan_to_num( binned_disksize_kpc_err) np.savetxt( output_filepath, np.transpose( (default_L_bins, binned_bulgesize_kpc, binned_bulgesize_kpc_err, binned_disksize_kpc, binned_disksize_kpc_err))) validation_this = self.validation_data[ (self.validation_data[:, 0] < z_mean + 0.25) & (self.validation_data[:, 0] > z_mean - 0.25)] ax.text( 11, 0.3, self.label_template.format(z_bin['z_min'], z_bin['z_max'])) ax.semilogy(validation_this[:, 1], validation_this[:, 2], label='Bulge', color=colors[0]) ax.fill_between( validation_this[:, 1], validation_this[:, 2] + validation_this[:, 4], validation_this[:, 2] - validation_this[:, 4], lw=0, alpha=0.2, facecolor=colors[0]) ax.semilogy(validation_this[:, 1] + 0.2, validation_this[:, 3], label='Disk', color=colors[1]) ax.fill_between( validation_this[:, 1] + 0.2, validation_this[:, 3] + validation_this[:, 5], validation_this[:, 3] - validation_this[:, 5], lw=0, alpha=0.2, facecolor=colors[1]) ax.errorbar(default_L_bins, binned_bulgesize_kpc, binned_bulgesize_kpc_err, marker='o', ls='', c=colors[0]) ax.errorbar(default_L_bins + 0.2, binned_disksize_kpc, binned_disksize_kpc_err, marker='o', ls='', c=colors[1]) ax.set_xlim([9, 13]) ax.set_ylim([1e-1, 25]) ax.set_yscale('log', nonposy='clip') del catalog_data_this col += 1 if col > 2: col = 0 row += 1 ax.legend(loc='best') fig.add_subplot(111, frameon=False) # hide tick and tick label of the big axes plt.tick_params(labelcolor='none', which='both', top='off', bottom='off', left='off', right='off') plt.grid(False) plt.xlabel(self.fig_xlabel) plt.ylabel(self.fig_ylabel) fig.subplots_adjust(hspace=0, wspace=0.2) fig.suptitle('{} ($M_V$) vs. {}'.format(catalog_name, self.data_label), fontsize='medium', y=0.93) finally: fig.savefig(os.path.join(output_dir, '{:s}.png'.format(self.test_name)), bbox_inches='tight') plt.close(fig) #TODO: calculate summary statistics return TestResult(inspect_only=True)
sed_fit_dir = os.path.join(sed_fit_dir, 'DC2/cosmoDC2_v1.1.4/sedLookup') assert os.path.isdir(sed_fit_dir) hpid = 10069 # an example healpix pixel that has been fit sed_fit_name = os.path.join(sed_fit_dir, 'sed_fit_%d.h5' % hpid) assert os.path.isfile(sed_fit_name) # load cosmoDC2 cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image') # get galaxy_id and redshift for crossmatching with SED fit files; # we will also get the magnitudes that should be reproduced # by our synthetic photometry # (hp_query makes sure we only load the healpixel we are interested in) hp_query = GCRQuery('healpix_pixel==%d' % hpid) cosmoDC2_data = cat.get_quantities([ 'galaxy_id', 'redshift', 'ra', 'dec', 'mag_true_u_lsst', 'mag_true_g_lsst', 'mag_true_r_lsst', 'mag_true_i_lsst', 'mag_true_z_lsst', 'mag_true_y_lsst', 'mag_u_lsst', 'mag_g_lsst', 'mag_r_lsst', 'mag_i_lsst', 'mag_z_lsst', 'mag_y_lsst', 'shear_1', 'shear_2', 'convergence' ], native_filters=[hp_query]) # make sure cosmoDC2_data is sorted by galaxy_id sorted_dex = np.argsort(cosmoDC2_data['galaxy_id']) for colname in cosmoDC2_data.keys(): cosmoDC2_data[colname] = cosmoDC2_data[colname][sorted_dex] # read in LSST bandpasses lsst_bp_dict = sims_photUtils.BandpassDict.loadTotalBandpassesFromFiles()
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): prepared = self.prepare_galaxy_catalog(catalog_instance) if prepared is None: return TestResult(skipped=True) absolute_magnitude1_field, absolute_magnitude2_field, quantities_needed = prepared # find out color cut threshold color = [] for data in catalog_instance.get_quantities( [ absolute_magnitude1_field, absolute_magnitude2_field, 'redshift_true' ], filters=['redshift_true < 0.2'], return_iterator=True, ): color.append(data[absolute_magnitude1_field] - data[absolute_magnitude2_field]) color_cut_percentile_at = 100.0 * (1 - self.color_cut_fraction) color_cut_thres = np.percentile(np.concatenate(color), color_cut_percentile_at) del color colnames = [absolute_magnitude2_field, 'halo_mass', 'redshift_true'] bins = (self.magnitude_bins, self.mass_bins, self.z_bins) hist_cen = np.zeros( (self.n_magnitude_bins, self.n_mass_bins, self.n_z_bins)) hist_sat = np.zeros_like(hist_cen) cen_query = GCRQuery('is_central') sat_query = ~GCRQuery('is_central') if 'r_host' in quantities_needed and 'r_vir' in quantities_needed: sat_query &= GCRQuery('r_host < r_vir') for data in catalog_instance.get_quantities( quantities_needed, filters=[ '{} - {} > {}'.format(absolute_magnitude1_field, absolute_magnitude2_field, color_cut_thres) ], return_iterator=True, ): cen_mask = cen_query.mask(data) sat_mask = sat_query.mask(data) data = np.stack((data[k] for k in colnames)).T hist_cen += np.histogramdd(data[cen_mask], bins)[0] hist_sat += np.histogramdd(data[sat_mask], bins)[0] data = cen_mask = sat_mask = None halo_counts = hist_cen.sum(axis=0) clf = dict() clf['sat'] = hist_sat / halo_counts clf['cen'] = hist_cen / halo_counts clf['tot'] = clf['sat'] + clf['cen'] self.make_plot(clf, catalog_name, os.path.join(output_dir, 'clf.png')) return TestResult(inspect_only=True)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): #check catalog data for required quantities mag_field = catalog_instance.first_available(*self.possible_mag_fields) if not mag_field: return TestResult(skipped=True, summary='Missing required mag_field option') if not catalog_instance.has_quantity(self.zlabel): return TestResult(skipped=True, summary='Missing required {} quantity'.format( self.zlabel)) filtername = mag_field.rpartition('_')[-1].upper() filelabel = '_'.join((filtername, self.band)) #setup plots fig, ax = plt.subplots(self.nrows, self.ncolumns, figsize=(self.figx_p, self.figy_p), sharex='col') catalog_color = next(self.colors) catalog_marker = next(self.markers) #initialize arrays for storing histogram sums N_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1), dtype=np.int) sumz_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1)) #get catalog data by looping over data iterator (needed for large catalogs) and aggregate histograms for catalog_data in catalog_instance.get_quantities( [self.zlabel, mag_field], filters=self.filters, return_iterator=True): catalog_data = GCRQuery( *((np.isfinite, col) for col in catalog_data)).filter(catalog_data) for cut_lo, cut_hi, N, sumz in zip_longest( self.mag_lo, self.mag_hi, N_array.reshape( -1, N_array.shape[-1] ), #flatten all but last dimension of array sumz_array.reshape(-1, sumz_array.shape[-1]), ): if cut_lo: mask = (catalog_data[mag_field] < cut_lo) if cut_hi: mask &= (catalog_data[mag_field] >= cut_hi) z_this = catalog_data[self.zlabel][mask] del mask #bin catalog_data and accumulate subplot histograms N += np.histogram(z_this, bins=self.zbins)[0] sumz += np.histogram(z_this, bins=self.zbins, weights=z_this)[0] #loop over magnitude cuts and make plots results = {} for n, (ax_this, summary_ax_this, cut_lo, cut_hi, N, sumz, z0, z0err) in enumerate( zip_longest( ax.flat, self.summary_ax.flat, self.mag_lo, self.mag_hi, N_array.reshape(-1, N_array.shape[-1]), sumz_array.reshape(-1, sumz_array.shape[-1]), self.validation_data.get('z0values', []), self.validation_data.get('z0errors', []), )): if cut_lo is None: #cut_lo is None if self.mag_lo is exhausted ax_this.set_visible(False) summary_ax_this.set_visible(False) else: cut_label = '{} $< {}$'.format(self.band, cut_lo) if cut_hi: cut_label = '${} <=$ '.format( cut_hi ) + cut_label #also appears in txt file so don't use \leq if z0 is None and 'z0const' in self.validation_data: #alternate format for some validation data z0 = self.validation_data[ 'z0const'] + self.validation_data['z0linear'] * cut_lo meanz = sumz / N sumN = N.sum() total = '(# of galaxies = {})'.format(sumN) Nerrors = np.sqrt(N) if self.normed: binwidths = self.zbins[1:] - self.zbins[:-1] N = N / sumN / binwidths Nerrors = Nerrors / sumN / binwidths #make subplot catalog_label = ' '.join( (catalog_name, cut_label.replace(self.band, filtername + ' ' + self.band))) validation_label = ' '.join( (self.validation_data.get('label', ''), cut_label)) key = cut_label.replace('$', '') results[key] = { 'meanz': meanz, 'total': total, 'N': N, 'N+-': Nerrors } self.catalog_subplot(ax_this, meanz, N, Nerrors, catalog_color, catalog_marker, catalog_label) if z0 and z0 > 0.: fits = self.validation_subplot(ax_this, meanz, z0, z0err, validation_label) results[key].update(fits) self.decorate_subplot(ax_this, n) #add curve for this catalog to summary plot self.catalog_subplot(summary_ax_this, meanz, N, Nerrors, catalog_color, catalog_marker, catalog_label) if self.first_pass and z0 and z0 > 0: self.validation_subplot( summary_ax_this, meanz, z0, z0err, validation_label ) #add validation data if evaluating first catalog self.decorate_subplot(summary_ax_this, n) #save results for catalog and validation data in txt files for filename, dtype, comment, info in zip_longest( (filelabel, self.observation), ('N', 'fit'), (filtername, ), ('total', )): if filename: with open( os.path.join(output_dir, 'Nvsz_' + filename + '.txt'), 'ab') as f_handle: #open file in append mode #loop over magnitude cuts in results dict for key, value in results.items(): self.save_quantities(dtype, value, f_handle, comment=' '.join( ((comment or ''), key, value.get(info, '')))) if self.first_pass: #turn off validation data plot in summary for remaining catalogs self.first_pass = False #make final adjustments to plots and save figure self.post_process_plot(fig) fig.savefig(os.path.join(output_dir, 'Nvsz_' + filelabel + '.png')) plt.close(fig) return TestResult(0, passed=True)
def load_catalogs(ra_min=52.3, ra_max=57.9, dec_min=-32.2, dec_max=-27.3): # coordinate box defaul is set from looking at plots general_analysis.ipynb truth = GCRCatalogs.load_catalog("dc2_truth_run1.2_static") objects = GCRCatalogs.load_catalog("dc2_object_run1.2p") coord_filter = [ 'ra >= {}'.format(ra_min), 'ra < {}'.format(ra_max), 'dec >= {}'.format(dec_min), 'dec < {}'.format(dec_max), ] object_filter = [ GCRQuery('clean'), (np.isfinite, 'mag_r'), (np.isfinite, 'magerr_r'), (np.isfinite, 'mag_r_cModel'), (np.isfinite, 'mag_g'), (np.isfinite, 'magerr_g'), (np.isfinite, 'extendedness'), ] #star_thresh = 0.0164 # see object_gcr_1_intro.ipynb star_filter = [ GCRQuery('extendedness == 0'), #GCRQuery('mag_r - mag_r_cModel < {}'.format(star_thresh)), #GCRQuery('magerr_r < 0.1'), ] object_all = objects.get_quantities([ 'objectId', 'ra', 'dec', 'mag_r', 'magerr_r', 'mag_r_cModel', 'mag_g', 'magerr_g', 'mag_g_cModel', 'extendedness' ], filters=coord_filter + object_filter) object_stars = objects.get_quantities([ 'objectId', 'ra', 'dec', 'mag_r', 'magerr_r', 'mag_r_cModel', 'mag_g', 'magerr_g', 'mag_g_cModel' ], filters=coord_filter + object_filter + star_filter) len_object_all = len(object_all['ra']) len_object_stars = len(object_stars['ra']) object_data = object_all, len_object_all, object_stars, len_object_stars truth_filters = [(np.isfinite, 'r'), (np.isfinite, 'g')] truth_all = truth.get_quantities(['ra', 'dec', 'mag_true_r', 'mag_true_g'], native_filters=coord_filter, filters=truth_filters) truth_stars = truth.get_quantities( ['ra', 'dec', 'mag_true_r', 'mag_true_g'], native_filters=coord_filter + ['star == 1'], filters=truth_filters) len_truth_all = len(truth_all['ra']) len_truth_stars = len(truth_stars['ra']) truth_data = truth_all, len_truth_all, truth_stars, len_truth_all print("Coadd objects:", len_object_all, ", Coadd stars:", len_object_stars) print("Truth objects:", len_truth_all, ", Truth stars:", len_truth_stars) return object_data, truth_data
def validate_agn_mags(cat_dir, obsid, agn_db, opsim_db=os.path.join( '/global/projecta/projectdirs/lsst', 'groups/SSim/DC2/', 'minion_1016_desc_dithered_v4_sfd.db')): """ Parameters ---------- cat_dir is the parent dir of $obsid obsid is the obsHistID of the pointing (an int) agn_db is the database of AGN parameters opsim_db is the path to the cadence database """ if not os.path.isfile(agn_db): raise RuntimeError('\n%s\nis not a file\n' % agn_db) inst_cat_dir = os.path.join(cat_dir, '%.8d' % obsid) if not os.path.isdir(inst_cat_dir): raise RuntimeError('\n%s\nis not a dir\n' % inst_cat_dir) agn_name = os.path.join(inst_cat_dir, 'agn_gal_cat_%d.txt.gz' % obsid) if not os.path.isfile(agn_name): raise RuntimeError('\n%s\nis not a file\n' % agn_name) phosim_name = os.path.join(inst_cat_dir, 'phosim_cat_%d.txt' % obsid) if not os.path.isfile(agn_name): raise RuntimeError('\n%s\nis not a file\n' % phosim_name) bandpass = None vistime = None with open(phosim_name, 'r') as in_file: for line in in_file: params = line.strip().split() if params[0] == 'filter': bandpass = int(params[1]) elif params[0] == 'vistime': vistime = float(params[1]) if (bandpass is not None and vistime is not None): break if bandpass is None: raise RuntimeError("Did not read bandpass") if vistime is None: raise RuntimeError("Did not read vistime") if not os.path.isfile(opsim_db): raise RuntimeError('\n%s\nis not a file' % opsim_db) with sqlite3.connect(opsim_db) as conn: c = conn.cursor() r = c.execute('SELECT expMJD, descDitheredRA, descDitheredDec ' 'FROM Summary WHERE obsHistID==%d' % obsid).fetchall() mjd = float(r[0][0]) pointing_ra = float(r[0][1]) pointing_dec = float(r[0][2]) agn_colnames = [ 'obj', 'uniqueID', 'ra', 'dec', 'magnorm', 'sed', 'redshift', 'g1', 'g2', 'kappa', 'dra', 'ddec', 'src_type', 'dust_rest', 'dust_obs', 'obs_av', 'obs_rv' ] agn_col_types = { 'ra': float, 'dec': float, 'magnorm': float, 'redshift': float, 'sed': bytes, 'uniqueID': int } agn_df = pd.read_csv(agn_name, delimiter=' ', compression='gzip', names=agn_colnames, dtype=agn_col_types, nrows=None) agn_df['galaxy_id'] = pd.Series(agn_df['uniqueID'] // 1024, index=agn_df.index) vv = np.array([ np.cos(pointing_dec) * np.cos(pointing_ra), np.cos(pointing_dec) * np.sin(pointing_ra), np.sin(pointing_dec) ]) hp_list = healpy.query_disc(32, vv, np.radians(2.2), nest=False, inclusive=True) chunk_size = 10000 agn_gid = [] agn_magnorm = [] agn_varParamStr = [] with sqlite3.connect(agn_db) as agn_params_conn: agn_params_cursor = agn_params_conn.cursor() query = 'SELECT galaxy_id, magNorm, varParamStr FROM agn_params' agn_query = agn_params_cursor.execute(query) agn_chunk = agn_query.fetchmany(size=chunk_size) while len(agn_chunk) > 0: agn_chunk = np.array(agn_chunk).transpose() chunk_gid = agn_chunk[0].astype(int) chunk_magnorm = agn_chunk[1].astype(float) chunk_varParamStr = agn_chunk[2] valid_agn = np.where(np.in1d(chunk_gid, agn_df['galaxy_id'].values)) agn_gid.append(chunk_gid[valid_agn]) agn_magnorm.append(chunk_magnorm[valid_agn]) agn_varParamStr.append(chunk_varParamStr[valid_agn]) agn_chunk = agn_query.fetchmany(size=chunk_size) agn_gid = np.concatenate(agn_gid) agn_magnorm = np.concatenate(agn_magnorm) agn_varParamStr = np.concatenate(agn_varParamStr) print('sql gave %d agn' % len(agn_gid)) sorted_dex = np.argsort(agn_gid) agn_gid = agn_gid[sorted_dex] agn_magnorm = agn_magnorm[sorted_dex] agn_varParamStr = agn_varParamStr[sorted_dex] instcat_gid = agn_df['galaxy_id'].values instcat_magnorm = agn_df['magnorm'].values instcat_z = agn_df['redshift'].values valid = np.where(instcat_gid < 1.0e11) instcat_gid = instcat_gid[valid] instcat_magnorm = instcat_magnorm[valid] instcat_z = instcat_z[valid] sorted_dex = np.argsort(instcat_gid) instcat_gid = instcat_gid[sorted_dex] instcat_magnorm = instcat_magnorm[sorted_dex] instcat_z = instcat_z[sorted_dex] cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image') cat_q = {} cat_q['galaxy_id'] = [] cat_q['redshift_true'] = [] for hp in hp_list: hp_query = GCRQuery('healpix_pixel==%d' % hp) local_q = cat.get_quantities(['galaxy_id', 'redshift_true'], native_filters=[hp_query]) valid = np.in1d(local_q['galaxy_id'], agn_df['galaxy_id']) if valid.any(): for k in cat_q: cat_q[k].append(local_q[k][valid]) for k in cat_q: cat_q[k] = np.concatenate(cat_q[k]) print('we have %d agn' % len(cat_q['galaxy_id'])) sorted_dex = np.argsort(cat_q['galaxy_id']) for k in cat_q: cat_q[k] = cat_q[k][sorted_dex] if not np.array_equal(cat_q['galaxy_id'], instcat_gid): msg = "GCR gid not equal to InstCat\n" msg += "len gcr %d\n" % len(cat_q['galaxy_id']) msg += "len instcat %d\n" % len(instcat_gid) msg += "other comparison %s\n" % str( np.array_equal(instcat_gid, agn_gid)) raise RuntimeError(msg) if not np.array_equal(instcat_gid, agn_gid): raise RuntimeError("galaxy_id arrays are not equal") if len(instcat_gid) == 0: raise RuntimeError("no AGN to test") agn_params = None for var in agn_varParamStr: var_dict = json.loads(var) if agn_params is None: agn_params = {} for k in var_dict['p']: agn_params[k] = [] for k in var_dict['p']: agn_params[k].append(var_dict['p'][k]) for k in agn_params: agn_params[k] = np.array(agn_params[k]) agn_simulator = ExtraGalacticVariabilityModels() agn_simulator._agn_threads = 3 d_mag = agn_simulator.applyAgn([np.arange(len(agn_gid), dtype=int)], agn_params, mjd, redshift=cat_q['redshift_true']) d_mag_instcat = instcat_magnorm - agn_magnorm error = np.abs(d_mag[bandpass] - d_mag_instcat) max_error = error.max() violation = np.where(error > 1.0e-5) for ii in violation[0]: print("%e -- %e %e %e" % (error[ii], d_mag[bandpass][ii], d_mag_instcat[ii], instcat_magnorm[ii])) for k in agn_params: print(' %s: %e' % (k, agn_params[k][ii])) valid = np.where(error <= 1.0e-5) d_mag_valid = d_mag_instcat[valid] mag_valid = instcat_magnorm[valid] if np.max(error) > 1.0e-5: raise RuntimeError("\n%s\nAGN validation failed: max mag error %e" % (agn_name, max_error))
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): #========================================= # Begin Reading in Data #========================================= # check if needed quantities exist if not catalog_instance.has_quantities([ 'redshift_true', self.loz_band, self.hiz_band, 'emissionLines/totalLineLuminosity:oxygenII3726', 'emissionLines/totalLineLuminosity:oxygenII3729', 'emissionLines/totalLineLuminosity:balmerAlpha6563', 'emissionLines/totalLineLuminosity:balmerBeta4861', 'emissionLines/totalLineLuminosity:nitrogenII6584', 'emissionLines/totalLineLuminosity:oxygenIII4959', 'emissionLines/totalLineLuminosity:oxygenIII5007', 'emissionLines/totalLineLuminosity:sulfurII6716', 'emissionLines/totalLineLuminosity:sulfurII6731' ]): return TestResult(skipped=True, summary='Necessary quantities are not present') loz_filter = GCRQuery((np.isfinite, 'redshift_true'), 'redshift_true > %f' % self.loz_lo, 'redshift_true < %f' % self.loz_hi) hiz_filter = GCRQuery((np.isfinite, 'redshift_true'), 'redshift_true > %f' % self.hiz_lo, 'redshift_true < %f' % self.hiz_hi) loz_magcut_filter = GCRQuery( (np.isfinite, self.loz_band), self.loz_band + ' < %.1f' % self.loz_magcut) hiz_magcut_filter = GCRQuery( (np.isfinite, self.hiz_band), self.hiz_band + ' < %.1f' % self.hiz_magcut) ha_fluxlim = GCRQuery( (np.isfinite, 'emissionLines/totalLineLuminosity:balmerAlpha6563'), (lambda x: x > self.ha_cut, 'emissionLines/totalLineLuminosity:balmerAlpha6563')) data = catalog_instance.get_quantities( [ 'redshift_true', 'emissionLines/totalLineLuminosity:oxygenII3726', 'emissionLines/totalLineLuminosity:oxygenII3729', 'emissionLines/totalLineLuminosity:balmerAlpha6563', 'emissionLines/totalLineLuminosity:balmerBeta4861', 'emissionLines/totalLineLuminosity:nitrogenII6584', 'emissionLines/totalLineLuminosity:oxygenIII4959', 'emissionLines/totalLineLuminosity:oxygenIII5007', 'emissionLines/totalLineLuminosity:sulfurII6716', 'emissionLines/totalLineLuminosity:sulfurII6731', self.loz_band, self.hiz_band ], filters=((loz_filter & loz_magcut_filter) | (hiz_filter & hiz_magcut_filter) & ha_fluxlim)) # data = data[data['emissionLines/totalLineLuminosity:balmerAlpha6563'] > self.ha_cut] z = data['redshift_true'] Halpha = (data['emissionLines/totalLineLuminosity:balmerAlpha6563'] * 3.839e26 * u.W).value Hbeta = (data['emissionLines/totalLineLuminosity:balmerBeta4861'] * 3.839e26 * u.W).value NII6584 = (data['emissionLines/totalLineLuminosity:nitrogenII6584'] * 3.839e26 * u.W).value OIII5007 = (data['emissionLines/totalLineLuminosity:oxygenIII5007'] * 3.839e26 * u.W).value OIII4959 = (data['emissionLines/totalLineLuminosity:oxygenIII4959'] * 3.839e26 * u.W).value OII3726 = (data['emissionLines/totalLineLuminosity:oxygenII3726'] * 3.839e26 * u.W).value OII3729 = (data['emissionLines/totalLineLuminosity:oxygenII3729'] * 3.839e26 * u.W).value SII6716 = (data['emissionLines/totalLineLuminosity:sulfurII6716'] * 3.839e26 * u.W).value SII6731 = (data['emissionLines/totalLineLuminosity:sulfurII6731'] * 3.839e26 * u.W).value SIItot = SII6716 + SII6731 OIIItot = OIII5007 + OIII4959 OIItot = OII3726 + OII3729 # Reduce the sample size by drawing self.sim_drawnum galaxies # indices = np.random.choice(np.arange(len(Halpha)), size=self.sim_drawnum, replace=False) indices = self.sdsscat.drawinds(z, size=self.sim_drawnum, catname=catalog_name) self.z = z[indices] self.ha = Halpha[indices] self.hb = Hbeta[indices] self.oii = OIItot[indices] self.oiii = OIIItot[indices] self.nii6584 = NII6584[indices] self.oiii5007 = OIII5007[indices] self.oiii4959 = OIII4959[indices] self.oii3726 = OII3726[indices] self.oii3729 = OII3729[indices] self.sii6716 = SII6716[indices] self.sii6731 = SII6731[indices] self.siitot = SIItot[indices] #========================================= # End Reading in Data #========================================= #========================================= # Perform the Test and Return Results #========================================= if self.truncate_cat_name: thisfig, pvalue, medianshift = self.makeplot( catalog_name.split('_')[0]) else: thisfig, pvalue, medianshift = self.makeplot(catalog_name) self.figlist.append(thisfig) self.runcat_name.append(catalog_name) if np.log10(pvalue) >= -4. and np.linalg.norm(medianshift) <= 0.25: return TestResult(pvalue, passed=True) elif np.linalg.norm(medianshift) <= 0.25: return TestResult(pvalue, passed=False, summary='P-value must exceed 1e-4.') elif np.log10(pvalue) >= -4.: return TestResult( pvalue, passed=False, summary= 'Total median shift must be less than or equal to 0.25 dex.') else: return TestResult( pvalue, passed=False, summary= 'P-value must exceed 1e-4 and total median shift must be less than or equal to 0.25 dex.' )
def validate_instance_catalog_magnitudes(cat_dir, obsid, seed=99, nrows=-1): """ Parameters ---------- cat_dir is the parent dir of $obsid obsid is the obsHistID of the pointing seed is the seed for a random number generator nrows is the number of galaxies to test (if <0, test all of them) """ agn_dtype = np.dtype([('galaxy_id', int), ('twinkles_id', int)]) agn_cache = np.genfromtxt(os.path.join(os.environ['TWINKLES_DIR'], 'data', 'cosmoDC2_v1.1.4_agn_cache.csv'), dtype=agn_dtype, delimiter=',', skip_header=1) sne_cache = np.genfromtxt(os.path.join(os.environ['TWINKLES_DIR'], 'data', 'cosmoDC2_v1.1.4_sne_cache.csv'), dtype=agn_dtype, delimiter=',', skip_header=1) sprinkled_gid = np.append(agn_cache['galaxy_id'], sne_cache['galaxy_id']) colnames = [ 'obj', 'uniqueID', 'ra', 'dec', 'magnorm', 'sed', 'redshift', 'g1', 'g2', 'kappa', 'dra', 'ddec', 'src_type', 'major', 'minor', 'positionAngle', 'sindex', 'dust_rest', 'rest_av', 'rest_rv', 'dust_obs', 'obs_av', 'obs_rv' ] to_drop = [ 'obj', 'g1', 'g2', 'kappa', 'dra', 'ddec', 'src_type', 'major', 'minor', 'positionAngle', 'sindex', 'dust_rest', 'dust_obs' ] col_types = { 'magnorm': float, 'redshift': float, 'rest_av': float, 'rest_rv': float, 'sed': bytes, 'uniqueID': int } assert os.path.isdir(cat_dir) data_dir = os.path.join(cat_dir, '%.8d' % obsid) if not os.path.isdir(data_dir): raise RuntimeError('\n\n%s\nis not a dir\n\n' % data_dir) phosim_file = os.path.join(data_dir, 'phosim_cat_%d.txt' % obsid) assert os.path.isfile(phosim_file) bandpass_name = None bandpass_name_list = 'ugrizy' with open(phosim_file, 'r') as in_file: for line in in_file: params = line.strip().split() if params[0] == 'filter': bandpass_name = bandpass_name_list[int(params[1])] assert bandpass_name is not None (tot_dict, hw_dict) = BandpassDict.loadBandpassesFromFiles() bandpass = hw_dict[bandpass_name] disk_file = os.path.join(data_dir, 'disk_gal_cat_%d.txt.gz' % obsid) if not os.path.isfile(disk_file): raise RuntimeError("%s is not a file" % disk_file) bulge_file = os.path.join(data_dir, 'bulge_gal_cat_%d.txt.gz' % obsid) assert os.path.isfile(bulge_file) knots_file = os.path.join(data_dir, 'knots_cat_%d.txt.gz' % obsid) assert os.path.isfile(knots_file) print('reading disks') disk_df = pd.read_csv(disk_file, delimiter=' ', compression='gzip', names=colnames, dtype=col_types, nrows=None) disk_df.drop(labels=to_drop, axis='columns', inplace=True) print('read disks') disk_df['galaxy_id'] = pd.Series(disk_df['uniqueID'] // 1024, index=disk_df.index) disk_df = disk_df.set_index('galaxy_id') print('reading bulges') bulge_df = pd.read_csv(bulge_file, delimiter=' ', compression='gzip', names=colnames, dtype=col_types, nrows=None) bulge_df.drop(labels=to_drop, axis='columns', inplace=True) print('read bulges') bulge_df['galaxy_id'] = pd.Series(bulge_df['uniqueID'] // 1024, index=bulge_df.index) bulge_df = bulge_df.set_index('galaxy_id') for ii in range(len(colnames)): colnames[ii] = colnames[ii] + '_knots' for ii in range(len(to_drop)): to_drop[ii] = to_drop[ii] + '_knots' print('reading knots') knots_df = pd.read_csv(knots_file, delimiter=' ', compression='gzip', names=colnames, dtype=col_types, nrows=None) knots_df.drop(labels=to_drop, axis='columns', inplace=True) print('read knots') knots_df['galaxy_id'] = pd.Series(knots_df['uniqueID_knots'] // 1024, index=knots_df.index) knots_df = knots_df.set_index('galaxy_id') wanted_col = [ 'sed', 'magnorm', 'redshift', 'rest_av', 'rest_rv', 'ra', 'dec' ] galaxy_df = disk_df[wanted_col].join(bulge_df[wanted_col], how='outer', lsuffix='_disk', rsuffix='_bulge') for ii in range(len(wanted_col)): wanted_col[ii] = wanted_col[ii] + '_knots' galaxy_df = galaxy_df.join(knots_df[wanted_col], how='outer', rsuffix='_knots') valid_galaxies = np.where( np.logical_not(np.in1d(galaxy_df.index, sprinkled_gid))) galaxy_df = galaxy_df.iloc[valid_galaxies] ra_center = np.nanmedian(galaxy_df['ra_disk'].values) dec_center = np.nanmedian(galaxy_df['dec_disk'].values) dd = angularSeparation(ra_center, dec_center, galaxy_df['ra_disk'].values, galaxy_df['dec_disk'].values) radius_deg = np.nanmax(dd) ra_rad = np.radians(ra_center) dec_rad = np.radians(dec_center) vv = np.array([ np.cos(ra_rad) * np.cos(dec_rad), np.sin(ra_rad) * np.cos(dec_rad), np.sin(dec_rad) ]) healpix_list = healpy.query_disc(32, vv, np.radians(radius_deg), nest=False, inclusive=True) gal_id_values = galaxy_df.index.values cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image') cat_qties = {} cat_qties['galaxy_id'] = [] cat_qties['ra'] = [] cat_qties['dec'] = [] for hp in healpix_list: hp_query = GCRQuery('healpix_pixel==%d' % hp) local_qties = cat.get_quantities(['galaxy_id', 'ra', 'dec'], native_filters=[hp_query]) valid = np.in1d(local_qties['galaxy_id'], gal_id_values) if valid.any(): for k in local_qties: cat_qties[k].append(local_qties[k][valid]) for k in cat_qties: cat_qties[k] = np.concatenate(cat_qties[k]) cat_dexes = np.arange(len(cat_qties['galaxy_id']), dtype=int) if nrows > 0: rng = np.random.RandomState(seed) dexes = rng.choice(galaxy_df.index.values, size=nrows, replace=False) galaxy_df = galaxy_df.loc[dexes] galaxy_df = galaxy_df.sort_index() invalid_knots = np.where( np.logical_not( np.isfinite(galaxy_df['magnorm_knots'].values.astype(np.float)))) dd = angularSeparation(ra_center, dec_center, cat_qties['ra'], cat_qties['dec']) dd_cut = np.where(dd < (radius_deg + 0.05)) gid = cat_qties['galaxy_id'][dd_cut] cat_dexes = cat_dexes[dd_cut] in1d_valid_dexes = np.where( np.in1d(gid, galaxy_df.index.values, assume_unique=True)) valid_dexes = cat_dexes[in1d_valid_dexes] gid = gid[in1d_valid_dexes] sorted_dex = np.argsort(gid) valid_dexes = valid_dexes[sorted_dex] assert len(gid) == len(galaxy_df.index.values) np.testing.assert_array_equal(gid[sorted_dex], galaxy_df.index.values) mag_name = 'mag_true_%s_lsst' % bandpass_name qties = {} qties['galaxy_id'] = [] qties[mag_name] = [] for hp in healpix_list: hp_query = GCRQuery('healpix_pixel==%d' % hp) local_qties = cat.get_quantities(['galaxy_id', mag_name], native_filters=[hp_query]) valid = np.in1d(local_qties['galaxy_id'], gal_id_values) if valid.any(): for k in local_qties: qties[k].append(local_qties[k][valid]) for k in qties: qties[k] = np.concatenate(qties[k]) np.testing.assert_array_equal(qties['galaxy_id'], cat_qties['galaxy_id']) mags = qties[mag_name][valid_dexes] gid = qties['galaxy_id'][valid_dexes] assert len(gid) == len(mags) assert len(mags) > 0 if nrows > 0: assert len(mags) == nrows t_start = time.time() n_proc = 3 d_proc = len(gid) // n_proc mgr = multiprocessing.Manager() out_dict = mgr.dict() p_list = [] for i_start in range(0, len(gid), d_proc): mag_true = mags[i_start:i_start + d_proc] galaxy_arr = galaxy_df.iloc[i_start:i_start + d_proc] p = multiprocessing.Process(target=validate_batch, args=(mag_true, galaxy_arr, bandpass, out_dict)) p.start() p_list.append(p) for p in p_list: p.join() assert len(list(out_dict.keys())) > 0 d_mag_max = 0.0 for k in out_dict.keys(): if out_dict[k] > d_mag_max: d_mag_max = out_dict[k] if d_mag_max > 1.0e-5: raise RuntimeError("\nobsHistID failed magnitud validation\n" "d_mag_max %e" % d_mag_max)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): bands = set(sum((c.split('-') for c in self.colors), [])) if self.rest_frame: possible_names = ('Mag_{}_lsst', 'Mag_{}_sdss', 'Mag_true_{}_lsst_z0', 'Mag_true_{}_sdss_z0') else: possible_lsst_names = (('mag_{}_noagn_lsst', 'mag_true_{}_noagn_lsst') if self.exclude_agn else ('mag_{}_cModel', 'mag_{}_lsst', 'mag_true_{}_lsst')) possible_non_lsst_names = ('mag_{}_sdss', 'mag_{}_des', 'mag_true_{}_sdss', 'mag_true_{}_des') if self.use_lsst: print('Selecting lsst magnitudes if available') possible_names = possible_lsst_names + possible_non_lsst_names else: possible_names = possible_non_lsst_names + possible_lsst_names labels = { band: catalog_instance.first_available(*(n.format(band) for n in possible_names)) for band in bands } labels = {k: v for k, v in labels.items() if v} if len(labels) < 2: return TestResult( skipped=True, summary= 'magnitudes in mock catalog do not have at least two needed bands.' ) filters = set( (v.split('_')[(-2 if 'z0' in v else -1)] for v in labels.values())) if len(filters) > 1: return TestResult( skipped=True, summary='magnitudes in mock catalog have mixed filters.') filter_this = filters.pop() if self.lightcone: labels['redshift'] = catalog_instance.first_available( 'redshift_true_galaxy', 'redshift_true', 'redshift') if not labels['redshift']: return TestResult( skipped=True, summary='mock catalog does not have redshift.') # Load mock catalog data filters = [ '{} > {}'.format(labels['redshift'], self.zlo), '{} < {}'.format(labels['redshift'], self.zhi) ] else: filters = None redshift = catalog_instance.redshift data = catalog_instance.get_quantities(list(labels.values()), filters) # filter catalog data further for matched object catalogs if np.ma.isMaskedArray(data[labels['redshift']]): galmask = np.ma.getmask(data[labels['redshift']]) data = {k: data[v][galmask] for k, v in labels.items()} else: data = {k: data[v] for k, v in labels.items()} # Color transformation color_trans = None if self.color_transformation_q: color_trans_name = None if self.validation_catalog == 'DEEP2' and (filter_this == 'sdss' or filter_this == 'des'): color_trans_name = '{}2cfht'.format(filter_this) elif self.validation_catalog == 'SDSS' and filter_this == 'des': color_trans_name = 'des2sdss' if color_trans_name: color_trans = color_transformation[color_trans_name] filter_title = r'\mathrm{{{}}}'.format(filter_this.upper()) if color_trans: data_transformed = {} for band in bands: try: data_transformed[band] = ne.evaluate(color_trans[band], local_dict=data, global_dict={}) except KeyError: continue filter_title = (r'{}\rightarrow\mathrm{{{}}}'.format( filter_title, self.validation_catalog) if data_transformed else filter_title) data_transformed['redshift'] = data['redshift'] data = data_transformed del data_transformed if self.obs_r_mag_limit and not self.rest_frame: data = GCRQuery('r < {}'.format(self.obs_r_mag_limit)).filter(data) elif self.Mag_r_limit and self.rest_frame: data = GCRQuery('r < {}'.format(self.Mag_r_limit)).filter(data) # Compute color distribution (PDF, CDF etc.) mock_color_dist = self.get_color_dist(data) # Calculate Cramer-von Mises statistic color_shift = {} cvm_omega = {} cvm_omega_shift = {} if self.validation_catalog: for color in self.colors: if not ((color in self.obs_color_dist) and (color in mock_color_dist)): continue color_shift[color] = self.obs_color_dist[color][ 'median'] - mock_color_dist[color]['median'] cvm_omega[color] = CvM_statistic( mock_color_dist[color]['nsample'], self.obs_color_dist[color]['nsample'], mock_color_dist[color]['binctr'], mock_color_dist[color]['cdf'], self.obs_color_dist[color]['binctr'], self.obs_color_dist[color]['cdf']) cvm_omega_shift[color] = CvM_statistic( mock_color_dist[color]['nsample'], self.obs_color_dist[color]['nsample'], mock_color_dist[color]['binctr'] + color_shift[color], mock_color_dist[color]['cdf'], self.obs_color_dist[color]['binctr'], self.obs_color_dist[color]['cdf']) redshift_title = '{:.2f} < z < {:.2f}'.format( self.zlo, self.zhi) if self.lightcone else 'z = {:.2f}'.format(redshift) catalog_color = next(self.plot_colors) self.make_plots(mock_color_dist, color_shift, cvm_omega, cvm_omega_shift, catalog_name, output_dir, filter_title, redshift_title, catalog_color) self.make_plots(mock_color_dist, color_shift, cvm_omega, cvm_omega_shift, catalog_name, output_dir, filter_title, redshift_title, catalog_color, summary=True) # Write to summary file fn = os.path.join(output_dir, self.summary_output_file) with open(fn, 'a') as f: if color_trans: f.write('Color transformation: {}\n'.format(color_trans_name)) else: f.write('No color transformation\n') f.write('{}\n'.format(redshift_title)) if self.obs_r_mag_limit: f.write('r_mag < %2.3f\n\n' % (self.obs_r_mag_limit)) elif self.Mag_r_limit: f.write('Mag_r < %2.3f\n\n' % (self.Mag_r_limit)) if self.validation_catalog: for color in self.colors: if self.validation_catalog and not ( (color in self.obs_color_dist) and (color in mock_color_dist)): continue f.write("Median " + color + " difference (obs - mock) = %2.3f\n" % (color_shift[color])) f.write(color + ": {} = {:2.6f}\n".format( 'CvM statistic', cvm_omega[color])) f.write(color + " (shifted): {} = {:2.6f}\n".format( 'CvM statistic', cvm_omega_shift[color])) f.write("\n") return TestResult(inspect_only=True)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): #check catalog data for required quantities mag_field = catalog_instance.first_available(*self.possible_mag_fields) if not mag_field: return TestResult(skipped=True, summary='Missing required mag_field option') self.zlabel = catalog_instance.first_available( *self.possible_redshifts) if not self.zlabel: return TestResult(skipped=True, summary='Missing required redhsift option') self.filters = [(lambda z: (z > self.zlo) & (z < self.zhi), self.zlabel)] jackknife_quantities = [self.zlabel, self.ra, self.dec ] if self.jackknife else [self.zlabel] for jq in jackknife_quantities: if not catalog_instance.has_quantity(jq): return TestResult( skipped=True, summary='Missing required {} quantity'.format(jq)) required_quantities = jackknife_quantities + [mag_field] filtername = mag_field.split('_')[(-1 if mag_field.startswith('m') else -2)].upper() #extract filtername filelabel = '_'.join((filtername, self.band)) #setup plots if self.truncate_cat_name: catalog_name = re.split('_', catalog_name)[0] if self.replace_cat_name: for k, v in self.replace_cat_name.items(): catalog_name = re.sub(k, v, catalog_name) fig, ax = plt.subplots(self.nrows, self.ncolumns, figsize=(self.figx_p, self.figy_p), sharex='col') catalog_color = next(self.colors) catalog_marker = next(self.markers) #initialize arrays for storing histogram sums N_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1), dtype=np.int) sumz_array = np.zeros((self.nrows, self.ncolumns, len(self.zbins) - 1)) jackknife_data = {} #get catalog data by looping over data iterator (needed for large catalogs) and aggregate histograms for catalog_data in catalog_instance.get_quantities( required_quantities, filters=self.filters, return_iterator=True): catalog_data = GCRQuery( *((np.isfinite, col) for col in catalog_data)).filter(catalog_data) # filter catalog data further for matched object catalogs if np.ma.isMaskedArray(catalog_data[self.zlabel]): galmask = np.ma.getmask(catalog_data[self.zlabel]) catalog_data = {k: v[galmask] for k, v in catalog_data.items()} for n, (cut_lo, cut_hi, N, sumz) in enumerate( zip_longest( self.mag_lo, self.mag_hi, N_array.reshape( -1, N_array.shape[-1] ), #flatten all but last dimension of array sumz_array.reshape(-1, sumz_array.shape[-1]), )): if cut_lo: mask = (catalog_data[mag_field] < cut_lo) if cut_hi: mask &= (catalog_data[mag_field] >= cut_hi) z_this = catalog_data[self.zlabel][mask] #save data for jackknife errors if self.jackknife: #store all the jackknife data in numpy arrays for later processing if str(n) not in jackknife_data.keys( ): #initialize sub-dict jackknife_data[str(n)] = dict( zip(required_quantities, [ np.asarray([]) for jq in jackknife_quantities ])) for jkey in jackknife_data[str(n)].keys(): jackknife_data[str(n)][jkey] = np.hstack( (jackknife_data[str(n)][jkey], catalog_data[jkey][mask])) del mask #bin catalog_data and accumulate subplot histograms N += np.histogram(z_this, bins=self.zbins)[0] sumz += np.histogram(z_this, bins=self.zbins, weights=z_this)[0] #loop over magnitude cuts and make plots results = {} scores = np.array([self.pass_limit] * self.nplots) for n, (ax_this, summary_ax_this, cut_lo, cut_hi, N, sumz, z0, z0err) in enumerate( zip_longest( ax.flat, self.summary_ax.flat, self.mag_lo, self.mag_hi, N_array.reshape(-1, N_array.shape[-1]), sumz_array.reshape(-1, sumz_array.shape[-1]), self.validation_data.get('z0values', []), self.validation_data.get('z0errors', []), )): if cut_lo is None: #cut_lo is None if self.mag_lo is exhausted if ax_this is not None: ax_this.set_visible(False) if summary_ax_this is not None: summary_ax_this.set_visible(False) else: cut_label = '{} $< {}$'.format(self.band, cut_lo) if cut_hi: cut_label = '${} \\leq $ {}'.format( cut_hi, cut_label) #also appears in txt file if z0 is None and 'z0const' in self.validation_data: #alternate format for some validation data z0 = self.validation_data[ 'z0const'] + self.validation_data['z0linear'] * cut_lo N = N.astype(np.float64) if self.jackknife: covariance = self.get_jackknife_errors( self.N_jack, jackknife_data[str(n)], N) else: covariance = np.diag(N) meanz = sumz / N sumN = N.sum() total = '(# of galaxies = {})'.format(sumN) if self.normed: scale = sumN * (self.zbins[1:] - self.zbins[:-1]) N /= scale covariance /= np.outer(scale, scale) Nerrors = np.sqrt(np.diag(covariance)) #make subplot catalog_label = ' '.join( (catalog_name, cut_label.replace(self.band, filtername + ' ' + self.band))) validation_label = ' '.join( (self.validation_data.get('label', ''), cut_label)) key = cut_label.replace('$', '').replace('\\leq', '<=') results[key] = { 'meanz': meanz, 'total': total, 'N': N, 'N+-': Nerrors } self.catalog_subplot(ax_this, meanz, N, Nerrors, catalog_color, catalog_marker, catalog_label) if z0 and z0 > 0: # has validation data fits = self.validation_subplot(ax_this, meanz, z0, z0err, validation_label) results[key].update(fits) scores[n], inverse_cov = self.get_score( N, fits['fit'], covariance, use_diagonal_only=self.use_diagonal_only) results[key]['score'] = 'Chi_sq/dof = {:11.4g}'.format( scores[n]) if self.jackknife: results[key]['inverse_cov_matrix'] = inverse_cov self.decorate_subplot(ax_this, n) #add curve for this catalog to summary plot self.catalog_subplot(summary_ax_this, meanz, N, Nerrors, catalog_color, catalog_marker, catalog_label) if self.first_pass and z0 and z0 > 0: self.validation_subplot( summary_ax_this, meanz, z0, z0err, validation_label ) #add validation data if evaluating first catalog self.decorate_subplot(summary_ax_this, n) #save results for catalog and validation data in txt files for filename, dtype, comment, info, info2 in zip_longest( (filelabel, self.observation), ('N', 'fit'), (filtername, ), ('total', 'z0'), ('score', 'z0err')): if filename: with open( os.path.join(output_dir, 'Nvsz_' + filename + '.txt'), 'ab') as f_handle: #open file in append binary mode #loop over magnitude cuts in results dict for key, value in results.items(): self.save_quantities(dtype, value, f_handle, comment=' '.join( ((comment or ''), key, value.get(info, ''), value.get(info2, '')))) if self.jackknife: with open( os.path.join(output_dir, 'Nvsz_' + filename + '.txt'), 'a') as f_handle: #open file in append mode f_handle.write('\nInverse Covariance Matrices:\n') for key in results.keys(): self.save_matrix( results[key]['inverse_cov_matrix'], f_handle, comment=key) if self.first_pass: #turn off validation data plot in summary for remaining catalogs self.first_pass = False #make final adjustments to plots and save figure self.post_process_plot(fig) fig.savefig(os.path.join(output_dir, 'Nvsz_' + filelabel + '.png')) plt.close(fig) #compute final score #final_scores = (scores < self.pass_limit) #pass or fail on average score rather than demanding that all distributions pass score_ave = np.mean(scores) return TestResult(score_ave, passed=score_ave < self.pass_limit)
0, '/global/homes/i/ihasan/python_stuff/lib/python3.7/site-packages/') from pzblend import PhotozBlend #sys.path.insert(0,"/global/cfs/cdirs/lsst/groups/PZ/PhotoZDC2/run2.2i_dr6_test/gcr-catalogs/lib/python3.7/site-packages/GCRCatalogs-0.18.1-py3.7.egg") #sys.path.insert(0,'/global/homes/i/ihasan/python_stuff/lib/python3.7/site-packages/') import GCRCatalogs from GCR import GCRQuery object_cat = GCRCatalogs.load_catalog('dc2_object_run2.2i_dr6a_with_photoz') tract_ids = [ 2731, 2904, 2906, 3081, 3082, 3084, 3262, 3263, 3265, 3448, 3450, 3831, 3832, 3834, 4029, 4030, 4031, 2905, 3083, 3264, 3449, 3833 ] basic_cuts = [ GCRQuery('extendedness > 0'), # Extended objects GCRQuery( (np.isfinite, 'mag_i')), # Select objects that have i-band magnitudes GCRQuery( 'clean' ), # The source has no flagged pixels (interpolated, saturated, edge, clipped...) # and was not skipped by the deblender GCRQuery('xy_flag == 0'), # Bad centroiding GCRQuery('snr_i_cModel >= 10'), GCRQuery('detect_isPrimary'), # (from this and below) basic flag cuts ~GCRQuery('deblend_skipped'), ~GCRQuery('base_PixelFlags_flag_edge'), ~GCRQuery('base_PixelFlags_flag_interpolatedCenter'), ~GCRQuery('base_PixelFlags_flag_saturatedCenter'), ~GCRQuery('base_PixelFlags_flag_crCenter'), ~GCRQuery('base_PixelFlags_flag_bad'),
plt.legend(loc='best', framealpha=0.3) plt.xlabel('z') plt.ylabel('M ') #plt.title('Halo ID: {}\nHalo Mass: {:.2e} h^-1 Msun'.format(cluster['halo_id'], cluster['halo_mass'])) plt.savefig(outpath + "mass_redshift.png", bbox_inches='tight') plt.close() print('********Plot saved********') #number of clusters to debug nmax = 3 cluster_data = Table(cluster_data) for i, cluster in enumerate(cluster_data): if (i >= nmax): break # plot only the first 3 members = GCRQuery('halo_id == {}'.format( cluster['halo_id'])).filter(galaxy_data) plt.figure() plt.scatter(members['ra'], members['dec'], s=(24 - members['mag_i']) * 8, label='Galaxy Members [{}]'.format(len(members['ra']))) plt.plot(cluster['ra'], cluster['dec'], 'xr', label='Cluster Center') plt.legend(loc='best', framealpha=0.3) plt.xlabel(r'ra [deg]') plt.ylabel(r'dec [deg]') plt.title('Halo ID: {}\nHalo Mass: {:.2e} h^-1 Msun'.format( cluster['halo_id'], cluster['halo_mass'])) plt.savefig(outpath + format(cluster['halo_id']) + ".png", bbox_inches='tight') plt.close() print('********Plot saved********')
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): ''' Loop over magnitude cuts and make plots ''' # load catalog data colnames = dict() colnames['z'] = catalog_instance.first_available( 'redshift', 'redshift_true') colnames['ra'] = catalog_instance.first_available('ra', 'ra_true') colnames['dec'] = catalog_instance.first_available('dec', 'dec_true') colnames['mag'] = catalog_instance.first_available( *self.possible_mag_fields) if not all(v for v in colnames.values()): return TestResult(skipped=True, summary='Missing requested quantities') filters = [(np.isfinite, c) for c in colnames.values()] filters.extend(( '{} < {}'.format( colnames['mag'], max(mag_bin['mag_max'] for mag_bin in self.mag_bins)), '{} >= {}'.format( colnames['mag'], min(mag_bin['mag_min'] for mag_bin in self.mag_bins)), )) if self.need_distance: filters.extend(( '{} < {}'.format( colnames['z'], max(mag_bin['cz_max'] for mag_bin in self.mag_bins) / self._C), '{} >= {}'.format( colnames['z'], min(mag_bin['cz_min'] for mag_bin in self.mag_bins) / self._C), )) catalog_data = catalog_instance.get_quantities(list(colnames.values()), filters=filters) catalog_data = {k: catalog_data[v] for k, v in colnames.items()} # create random rand_ra, rand_dec = generate_uniform_random_ra_dec_footprint( catalog_data['ra'].size * self.random_mult, get_healpixel_footprint(catalog_data['ra'], catalog_data['dec'], self.random_nside), self.random_nside, ) if not self.need_distance: rand_cat = treecorr.Catalog(ra=rand_ra, dec=rand_dec, ra_units='deg', dec_units='deg') del rand_ra, rand_dec rr = treecorr.NNCorrelation(**self._treecorr_config) rr.process(rand_cat) fig, ax = plt.subplots() try: for mag_bin, color in zip( self.mag_bins, plt.cm.plasma_r(np.linspace(0.1, 1, len(self.mag_bins)))): #pylint: disable=E1101 # filter catalog data for this bin filters = [ 'mag < {}'.format(mag_bin['mag_max']), 'mag >= {}'.format(mag_bin['mag_min']), ] if self.need_distance: filters.extend(( 'z < {}'.format(mag_bin['cz_max'] / self._C), 'z >= {}'.format(mag_bin['cz_min'] / self._C), )) catalog_data_this = GCRQuery(*filters).filter(catalog_data) cat = treecorr.Catalog( ra=catalog_data_this['ra'], dec=catalog_data_this['dec'], ra_units='deg', dec_units='deg', r=(redshift2dist(catalog_data_this['z'], catalog_instance.cosmology) if self.need_distance else None), ) del catalog_data_this treecorr_config = self._treecorr_config.copy() if 'pi_max' in mag_bin: treecorr_config['min_rpar'] = -mag_bin['pi_max'] treecorr_config['max_rpar'] = mag_bin['pi_max'] if self.need_distance: rand_cat = treecorr.Catalog( ra=rand_ra, dec=rand_dec, ra_units='deg', dec_units='deg', r=generate_uniform_random_dist( rand_ra.size, *redshift2dist( np.array( [mag_bin['cz_min'], mag_bin['cz_max']]) / self._C, catalog_instance.cosmology)), ) rr = treecorr.NNCorrelation(treecorr_config) rr.process(rand_cat) dd = treecorr.NNCorrelation(treecorr_config) dr = treecorr.NNCorrelation(treecorr_config) rd = treecorr.NNCorrelation(treecorr_config) dd.process(cat) dr.process(rand_cat, cat) rd.process(cat, rand_cat) output_filepath = os.path.join( output_dir, self.output_filename_template.format( mag_bin['mag_min'], mag_bin['mag_max'])) dd.write(output_filepath, rr, dr, rd) xi, var_xi = dd.calculateXi(rr, dr, rd) xi_rad = np.exp(dd.meanlogr) xi_sig = np.sqrt(var_xi) ax.loglog(self.validation_data[:, 0], self.validation_data[:, mag_bin['data_col']], c=color, label=self.label_template.format( mag_bin['mag_min'], mag_bin['mag_max'])) if 'data_err_col' in mag_bin: y1 = self.validation_data[:, mag_bin[ 'data_col']] + self.validation_data[:, mag_bin[ 'data_err_col']] y2 = self.validation_data[:, mag_bin[ 'data_col']] - self.validation_data[:, mag_bin[ 'data_err_col']] y2[y2 <= 0] = self.fig_ylim[0] * 0.9 ax.fill_between(self.validation_data[:, 0], y1, y2, lw=0, color=color, alpha=0.2) scale_wp = mag_bin[ 'pi_max'] * 2.0 if 'pi_max' in mag_bin else 1.0 ax.errorbar(xi_rad, xi * scale_wp, xi_sig * scale_wp, marker='o', ls='', c=color) ax.legend(loc='best') ax.set_xlabel(self.fig_xlabel) ax.set_ylim(*self.fig_ylim) ax.set_ylabel(self.fig_ylabel) ax.set_title('{} vs. {}'.format(catalog_name, self.data_label), fontsize='medium') finally: fig.savefig(os.path.join(output_dir, '{:s}.png'.format(self.test_name)), bbox_inches='tight') plt.close(fig) #TODO: calculate summary statistics return TestResult(inspect_only=True)