final['CO'] = hh[1]['CO'][:] hh.close() else: binSizeP = (final['RPMAX'] - final['RPMIN']) / final['NP'] binSizeT = (final['RTMAX'] - 0.) / final['NT'] if not args.do_not_smooth_cov: print('INFO: The covariance will be smoothed') final['CO'] = smooth_cov(final['DA'], final['WE'], final['RP'], final['RT'], delta_r_trans=binSizeT, delta_r_par=binSizeP) else: print('INFO: The covariance will not be smoothed') final['CO'] = compute_cov(final['DA'], final['WE']) ### Test covariance matrix try: scipy.linalg.cholesky(final['CO']) except scipy.linalg.LinAlgError: print('WARNING: Matrix is not positive definite') ### Measurement final['DA'] = (final['DA'] * final['WE']).sum(axis=0) final['WE'] = final['WE'].sum(axis=0) w = final['WE'] > 0. final['DA'][w] /= final['WE'][w] ### Distortion matrix if args.dmat is not None:
def main(cmdargs): """Export auto and cross-correlation for the fitter.""" parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Export auto and cross-correlation for the fitter.') parser.add_argument( '--data', type=str, default=None, required=True, help='Correlation produced via picca_cf.py, picca_xcf.py, ...') parser.add_argument('--out', type=str, default=None, required=True, help='Output file name') parser.add_argument( '--dmat', type=str, default=None, required=False, help=( 'Distortion matrix produced via picca_dmat.py, picca_xdmat.py... ' '(if not provided will be identity)')) parser.add_argument( '--cov', type=str, default=None, required=False, help=('Covariance matrix (if not provided will be calculated by ' 'subsampling)')) parser.add_argument( '--cor', type=str, default=None, required=False, help=('Correlation matrix (if not provided will be calculated by ' 'subsampling)')) parser.add_argument( '--remove-shuffled-correlation', type=str, default=None, required=False, help='Remove a correlation from shuffling the distribution of los') parser.add_argument('--do-not-smooth-cov', action='store_true', default=False, help='Do not smooth the covariance matrix') args = parser.parse_args(cmdargs) hdul = fitsio.FITS(args.data) r_par = np.array(hdul[1]['RP'][:]) r_trans = np.array(hdul[1]['RT'][:]) z = np.array(hdul[1]['Z'][:]) num_pairs = np.array(hdul[1]['NB'][:]) weights = np.array(hdul[2]['WE'][:]) if 'DA_BLIND' in hdul[2].get_colnames(): xi = np.array(hdul[2]['DA_BLIND'][:]) data_name = 'DA_BLIND' else: xi = np.array(hdul[2]['DA'][:]) data_name = 'DA' head = hdul[1].read_header() num_bins_r_par = head['NP'] num_bins_r_trans = head['NT'] r_trans_max = head['RTMAX'] r_par_min = head['RPMIN'] r_par_max = head['RPMAX'] if "BLINDING" in head: blinding = head["BLINDING"] # older runs are not from DESI main survey and should not be blinded else: blinding = "none" hdul.close() if not args.remove_shuffled_correlation is None: hdul = fitsio.FITS(args.remove_shuffled_correlation) xi_shuffled = hdul['COR'][data_name][:] weight_shuffled = hdul['COR']['WE'][:] xi_shuffled = (xi_shuffled * weight_shuffled).sum(axis=1) weight_shuffled = weight_shuffled.sum(axis=1) w = weight_shuffled > 0. xi_shuffled[w] /= weight_shuffled[w] hdul.close() xi -= xi_shuffled[:, None] if args.cov is not None: userprint(("INFO: The covariance-matrix will be read from file: " "{}").format(args.cov)) hdul = fitsio.FITS(args.cov) covariance = hdul[1]['CO'][:] hdul.close() elif args.cor is not None: userprint(("INFO: The correlation-matrix will be read from file: " "{}").format(args.cor)) hdul = fitsio.FITS(args.cor) correlation = hdul[1]['CO'][:] hdul.close() if ((correlation.min() < -1.) or (correlation.min() > 1.) or (correlation.max() < -1.) or (correlation.max() > 1.) or np.any(np.diag(correlation) != 1.)): userprint(("WARNING: The correlation-matrix has some incorrect " "values")) var = np.diagonal(correlation) correlation = correlation / np.sqrt(var * var[:, None]) covariance = compute_cov(xi, weights) var = np.diagonal(covariance) covariance = correlation * np.sqrt(var * var[:, None]) else: delta_r_par = (r_par_max - r_par_min) / num_bins_r_par delta_r_trans = (r_trans_max - 0.) / num_bins_r_trans if not args.do_not_smooth_cov: userprint("INFO: The covariance will be smoothed") covariance = smooth_cov(xi, weights, r_par, r_trans, delta_r_trans=delta_r_trans, delta_r_par=delta_r_par) else: userprint("INFO: The covariance will not be smoothed") covariance = compute_cov(xi, weights) xi = (xi * weights).sum(axis=0) weights = weights.sum(axis=0) w = weights > 0 xi[w] /= weights[w] try: scipy.linalg.cholesky(covariance) except scipy.linalg.LinAlgError: userprint("WARNING: Matrix is not positive definite") if args.dmat is not None: hdul = fitsio.FITS(args.dmat) if data_name == "DA_BLIND" and 'DM_BLIND' in hdul[1].get_colnames(): dmat = np.array(hdul[1]['DM_BLIND'][:]) dmat_name = 'DM_BLIND' elif data_name == "DA_BlIND": userprint("Blinded correlations were given but distortion matrix " "is unblinded. These files should not mix. Exiting...") sys.exit(1) elif 'DM_BLIND' in hdul[1].get_colnames(): userprint( "Non-blinded correlations were given but distortion matrix " "is blinded. These files should not mix. Exiting...") sys.exit(1) else: dmat = hdul[1]['DM'][:] dmat_name = 'DM' try: r_par_dmat = hdul[2]['RP'][:] r_trans_dmat = hdul[2]['RT'][:] z_dmat = hdul[2]['Z'][:] except IOError: r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() if dmat.shape == (xi.size, xi.size): r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() hdul.close() else: dmat = np.eye(len(xi)) r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() results = fitsio.FITS(args.out, 'rw', clobber=True) header = [ { 'name': "BLINDING", 'value': blinding, 'comment': 'String specifying the blinding strategy' }, { 'name': 'RPMIN', 'value': r_par_min, 'comment': 'Minimum r-parallel' }, { 'name': 'RPMAX', 'value': r_par_max, 'comment': 'Maximum r-parallel' }, { 'name': 'RTMAX', 'value': r_trans_max, 'comment': 'Maximum r-transverse' }, { 'name': 'NP', 'value': num_bins_r_par, 'comment': 'Number of bins in r-parallel' }, { 'name': 'NT', 'value': num_bins_r_trans, 'comment': 'Number of bins in r-transverse' }, { 'name': 'OMEGAM', 'value': head['OMEGAM'], 'comment': 'Omega_matter(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'OMEGAR', 'value': head['OMEGAR'], 'comment': 'Omega_radiation(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'OMEGAK', 'value': head['OMEGAK'], 'comment': 'Omega_k(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'WL', 'value': head['WL'], 'comment': 'Equation of state of dark energy of fiducial LambdaCDM cosmology' }, ] comment = [ 'R-parallel', 'R-transverse', 'Redshift', 'Correlation', 'Covariance matrix', 'Distortion matrix', 'Number of pairs' ] results.write([xi, r_par, r_trans, z, covariance, dmat, num_pairs], names=[data_name, 'RP', 'RT', 'Z', 'CO', dmat_name, 'NB'], comment=comment, header=header, extname='COR') comment = ['R-parallel model', 'R-transverse model', 'Redshift model'] results.write([r_par_dmat, r_trans_dmat, z_dmat], names=['DMRP', 'DMRT', 'DMZ'], comment=comment, extname='DMATTRI') results.close()
axis=0) data[key]['HEALPID'] = np.append(data[key]['HEALPID'], new_healpix) # Sort the data by the healpix values for key in sorted(list(data.keys())): sort = np.array(data[key]['HEALPID']).argsort() data[key]['DA'] = data[key]['DA'][sort] data[key]['WE'] = data[key]['WE'][sort] data[key]['HEALPID'] = data[key]['HEALPID'][sort] # Append the data xi = np.append(data[0]['DA'], data[1]['DA'], axis=1) weights = np.append(data[0]['WE'], data[1]['WE'], axis=1) # Compute the covariance covariance = compute_cov(xi, weights) # Get the cross-covariance num_bins = data[0]['DA'].shape[1] cross_covariance = covariance.copy() cross_covariance = cross_covariance[:, num_bins:] cross_covariance = cross_covariance[:num_bins, :] ### Get the cross-correlation var = np.diagonal(covariance) cor = covariance / np.sqrt(var * var[:, None]) cross_correlation = cor.copy() cross_correlation = cross_correlation[:, num_bins:] cross_correlation = cross_correlation[:num_bins, :] ### Test if valid
def main(): # pylint: disable-msg=too-many-locals,too-many-branches,too-many-statements """Exports auto and cross-correlation of catalog of objects for the fitter.""" parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=('Export auto and cross-correlation of catalog of objects ' 'for the fitter.')) parser.add_argument('--out', type=str, default=None, required=True, help='Output file name') parser.add_argument('--DD-file', type=str, default=None, required=False, help='File of the data x data auto-correlation') parser.add_argument('--RR-file', type=str, default=None, required=False, help='File of the random x random auto-correlation') parser.add_argument('--DR-file', type=str, default=None, required=False, help='File of the data x random auto-correlation') parser.add_argument('--RD-file', type=str, default=None, required=False, help='File of the random x data auto-correlation') parser.add_argument('--xDD-file', type=str, default=None, required=False, help='File of the data_1 x data_2 cross-correlation') parser.add_argument( '--xRR-file', type=str, default=None, required=False, help='File of the random_1 x random_2 cross-correlation') parser.add_argument('--xD1R2-file', type=str, default=None, required=False, help='File of the data_1 x random_2 cross-correlation') parser.add_argument('--xR1D2-file', type=str, default=None, required=False, help='File of the random_1 x data_2 cross-correlation') parser.add_argument( '--do-not-smooth-cov', action='store_true', default=False, help='Do not smooth the covariance matrix from sub-sampling') parser.add_argument('--get-cov-from-poisson', action='store_true', default=False, help='Get covariance matrix from Poisson statistics') parser.add_argument( '--cov', type=str, default=None, required=False, help=('Path to a covariance matrix file (if not provided it will be ' 'calculated by subsampling or from Poisson statistics)')) args = parser.parse_args() ### Auto or cross correlation? if ((args.DD_file is None and args.xDD_file is None) or (args.DD_file is not None and args.xDD_file is not None) or (args.cov is not None and not args.get_cov_from_poisson)): userprint(('ERROR: No data files, or both auto and cross data files, ' 'or two different method for covariance')) sys.exit() elif args.DD_file is not None: corr = 'AUTO' correlation_files = { 'DD': args.DD_file, 'RR': args.RR_file, 'DR': args.DR_file, 'RD': args.RD_file } elif not args.xDD_file is None: # TODO: Test if picca_co.py and export_co.py work for cross corr = 'CROSS' correlation_files = { 'xDD': args.xDD_file, 'xRR': args.xRR_file, 'xD1R2': args.xD1R2_file, 'xR1D2': args.xR1D2_file } # Read files data = {} for type_corr, filename in correlation_files.items(): hdul = fitsio.FITS(filename) header = hdul[1].read_header() fid_Om = header['OMEGAM'] fid_Or = header['OMEGAR'] fid_Ok = header['OMEGAK'] fid_wl = header['WL'] if type_corr in ['DD', 'RR']: num_objects = header['NOBJ'] coef = num_objects * (num_objects - 1) else: num_objects = header['NOBJ'] num_objects2 = header['NOBJ2'] coef = num_objects * num_objects2 if type_corr in ['DD', 'xDD']: data['COEF'] = coef for item in ['NT', 'NP', 'RTMAX', 'RPMIN', 'RPMAX']: data[item] = header[item] for item in ['RP', 'RT', 'Z', 'NB']: data[item] = np.array(hdul[1][item][:]) data[type_corr] = {} data[type_corr]['NSIDE'] = header['NSIDE'] data[type_corr]['HLPXSCHM'] = hdul[2].read_header()['HLPXSCHM'] w = np.array(hdul[2]['WE'][:]).sum(axis=1) > 0. if w.sum() != w.size: userprint("INFO: {} sub-samples were empty".format(w.size - w.sum())) data[type_corr]['HEALPID'] = hdul[2]['HEALPID'][:][w] data[type_corr]['WE'] = hdul[2]['WE'][:][w] / coef hdul.close() # Compute correlation if corr == 'AUTO': xi_data_data = data['DD']['WE'].sum(axis=0) xi_random_random = data['RR']['WE'].sum(axis=0) xi_data_random = data['DR']['WE'].sum(axis=0) xi_random_data = data['RD']['WE'].sum(axis=0) w = xi_random_random > 0. xi = np.zeros(xi_data_data.size) xi[w] = (xi_data_data[w] + xi_random_random[w] - xi_random_data[w] - xi_data_random[w]) / xi_random_random[w] else: xi_data_data = data['xDD']['WE'].sum(axis=0) xi_random_random = data['xRR']['WE'].sum(axis=0) xi_data1_random2 = data['xD1R2']['WE'].sum(axis=0) xi_data2_random1 = data['xR1D2']['WE'].sum(axis=0) w = xi_random_random > 0. xi = np.zeros(xi_data_data.size) xi[w] = (xi_data_data[w] + xi_random_random[w] - xi_data1_random2[w] - xi_data2_random1[w]) / xi_random_random[w] data['DA'] = xi data['corr_DD'] = xi_data_data data['corr_RR'] = xi_random_random # Compute covariance matrix if not args.cov is None: userprint('INFO: Read covariance from file') hdul = fitsio.FITS(args.cov) data['CO'] = hdul[1]['CO'][:] hdul.close() elif args.get_cov_from_poisson: userprint('INFO: Compute covariance from Poisson statistics') w = data['corr_RR'] > 0. covariance = np.zeros(data['corr_DD'].size) covariance[w] = ((data['COEF'] / 2. * data['corr_DD'][w])**2 / (data['COEF'] / 2. * data['corr_RR'][w])**3) data['CO'] = np.diag(covariance) else: userprint('INFO: Compute covariance from sub-sampling') ### To have same number of HEALPix for type_corr1 in list(correlation_files): for type_corr2 in list(correlation_files): if data[type_corr1]['NSIDE'] != data[type_corr2]['NSIDE']: userprint("ERROR: NSIDE are different: {} != " "{}".format(data[type_corr1]['NSIDE'], data[type_corr2]['NSIDE'])) sys.exit() if data[type_corr1]['HLPXSCHM'] != data[type_corr2]['HLPXSCHM']: userprint("ERROR: HLPXSCHM are different: {} != " "{}".format(data[type_corr1]['HLPXSCHM'], data[type_corr2]['HLPXSCHM'])) sys.exit() w = np.logical_not( np.in1d(data[type_corr1]['HEALPID'], data[type_corr2]['HEALPID'])) if w.sum() != 0: userprint("WARNING: HEALPID are different by {} for {}:{} " "and {}:{}".format( w.sum(), type_corr1, data[type_corr1]['HEALPID'].size, type_corr2, data[type_corr2]['HEALPID'].size)) new_healpix = data[type_corr1]['HEALPID'][w] num_new_healpix = new_healpix.size num_bins = data[type_corr2]['WE'].shape[1] data[type_corr2]['HEALPID'] = np.append( data[type_corr2]['HEALPID'], new_healpix) data[type_corr2]['WE'] = np.append(data[type_corr2]['WE'], np.zeros( (num_new_healpix, num_bins)), axis=0) # Sort the data by the healpix values for type_corr1 in list(correlation_files): sort = np.array(data[type_corr1]['HEALPID']).argsort() data[type_corr1]['WE'] = data[type_corr1]['WE'][sort] data[type_corr1]['HEALPID'] = data[type_corr1]['HEALPID'][sort] if corr == 'AUTO': xi_data_data = data['DD']['WE'] xi_random_random = data['RR']['WE'] xi_data_random = data['DR']['WE'] xi_random_data = data['RD']['WE'] w = xi_random_random > 0. xi = np.zeros(xi_data_data.shape) xi[w] = (xi_data_data[w] + xi_random_random[w] - xi_data_random[w] - xi_random_data[w]) / xi_random_random[w] weights = data['DD']['WE'] else: xi_data_data = data['xDD']['WE'] xi_random_random = data['xRR']['WE'] xi_data1_random2 = data['xD1R2']['WE'] xi_data2_random1 = data['xR1D2']['WE'] w = xi_random_random > 0. xi = np.zeros(xi_data_data.shape) xi[w] = ((xi_data_data[w] + xi_random_random[w] - xi_data1_random2[w] - xi_data2_random1[w]) / xi_random_random[w]) weights = data['xDD']['WE'] data['HLP_DA'] = xi data['HLP_WE'] = weights if args.do_not_smooth_cov: userprint('INFO: The covariance will not be smoothed') covariance = compute_cov(xi, weights) else: userprint('INFO: The covariance will be smoothed') delta_r_par = (data['RPMAX'] - data['RPMIN']) / data['NP'] delta_r_trans = (data['RTMAX'] - 0.) / data['NT'] covariance = smooth_cov(xi, weights, data['RP'], data['RT'], delta_r_par=delta_r_par, delta_r_trans=delta_r_trans) data['CO'] = covariance try: scipy.linalg.cholesky(data['CO']) except scipy.linalg.LinAlgError: userprint('WARNING: Matrix is not positive definite') # Identity distortion matrix data['DM'] = np.eye(data['DA'].size) # Save results results = fitsio.FITS(args.out, 'rw', clobber=True) header = {} if corr == 'AUTO': nside = data['DD']['NSIDE'] else: nside = data['xDD']['NSIDE'] header = [{ 'name': 'RPMIN', 'value': data['RPMIN'], 'comment': 'Minimum r-parallel' }, { 'name': 'RPMAX', 'value': data['RPMAX'], 'comment': 'Maximum r-parallel' }, { 'name': 'RTMAX', 'value': data['RTMAX'], 'comment': 'Maximum r-transverse' }, { 'name': 'NP', 'value': data['NP'], 'comment': 'Number of bins in r-parallel' }, { 'name': 'NT', 'value': data['NT'], 'comment': 'Number of bins in r-transverse' }, { 'name': 'NSIDE', 'value': nside, 'comment': 'Healpix nside' }, { 'name': 'OMEGAM', 'value': fid_Om, 'comment': 'Omega_matter(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'OMEGAR', 'value': fid_Or, 'comment': 'Omega_radiation(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'OMEGAK', 'value': fid_Ok, 'comment': 'Omega_k(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'WL', 'value': fid_wl, 'comment': 'Equation of state of dark energy of fiducial LambdaCDM cosmology' }] names = ['RP', 'RT', 'Z', 'DA', 'CO', 'DM', 'NB'] comment = [ 'R-parallel', 'R-transverse', 'Redshift', 'Correlation', 'Covariance matrix', 'Distortion matrix', 'Number of pairs' ] results.write([data[name] for name in names], names=names, header=header, comment=comment, extname='COR') if args.cov is None and not args.get_cov_from_poisson: if corr == 'AUTO': healpix_scheme = data['DD']['HLPXSCHM'] healpix_list = data['DD']['HEALPID'] else: healpix_scheme = data['xDD']['HLPXSCHM'] healpix_list = data['xDD']['HEALPID'] header2 = [{ 'name': 'HLPXSCHM', 'value': healpix_scheme, 'comment': 'healpix scheme' }] comment = ['Healpix index', 'Sum of weight', 'Correlation'] results.write([healpix_list, data['HLP_WE'], data['HLP_DA']], names=['HEALPID', 'WE', 'DA'], header=header2, comment=comment, extname='SUB_COR') results.close()
def main(): """Export auto and cross-correlation for the fitter.""" parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Export auto and cross-correlation for the fitter.') parser.add_argument( '--data', type=str, default=None, required=True, help='Correlation produced via picca_cf.py, picca_xcf.py, ...') parser.add_argument('--out', type=str, default=None, required=True, help='Output file name') parser.add_argument( '--dmat', type=str, default=None, required=False, help=( 'Distortion matrix produced via picca_dmat.py, picca_xdmat.py... ' '(if not provided will be identity)')) parser.add_argument( '--cov', type=str, default=None, required=False, help=('Covariance matrix (if not provided will be calculated by ' 'subsampling)')) parser.add_argument( '--cor', type=str, default=None, required=False, help=('Correlation matrix (if not provided will be calculated by ' 'subsampling)')) parser.add_argument( '--remove-shuffled-correlation', type=str, default=None, required=False, help='Remove a correlation from shuffling the distribution of los') parser.add_argument('--do-not-smooth-cov', action='store_true', default=False, help='Do not smooth the covariance matrix') args = parser.parse_args() hdul = fitsio.FITS(args.data) r_par = np.array(hdul[1]['RP'][:]) r_trans = np.array(hdul[1]['RT'][:]) z = np.array(hdul[1]['Z'][:]) num_pairs = np.array(hdul[1]['NB'][:]) xi = np.array(hdul[2]['DA'][:]) weights = np.array(hdul[2]['WE'][:]) head = hdul[1].read_header() num_bins_r_par = head['NP'] num_bins_r_trans = head['NT'] r_trans_max = head['RTMAX'] r_par_min = head['RPMIN'] r_par_max = head['RPMAX'] hdul.close() if not args.remove_shuffled_correlation is None: hdul = fitsio.FITS(args.remove_shuffled_correlation) xi_shuffled = hdul['COR']['DA'][:] weight_shuffled = hdul['COR']['WE'][:] xi_shuffled = (xi_shuffled * weight_shuffled).sum(axis=1) weight_shuffled = weight_shuffled.sum(axis=1) w = weight_shuffled > 0. xi_shuffled[w] /= weight_shuffled[w] hdul.close() xi -= xi_shuffled[:, None] if args.cov is not None: userprint(("INFO: The covariance-matrix will be read from file: " "{}").format(args.cov)) hdul = fitsio.FITS(args.cov) covariance = hdul[1]['CO'][:] hdul.close() elif args.cor is not None: userprint(("INFO: The correlation-matrix will be read from file: " "{}").format(args.cor)) hdul = fitsio.FITS(args.cor) correlation = hdul[1]['CO'][:] hdul.close() if ((correlation.min() < -1.) or (correlation.min() > 1.) or (correlation.max() < -1.) or (correlation.max() > 1.) or np.any(np.diag(correlation) != 1.)): userprint(("WARNING: The correlation-matrix has some incorrect " "values")) var = np.diagonal(correlation) correlation = correlation / np.sqrt(var * var[:, None]) covariance = compute_cov(xi, weights) var = np.diagonal(covariance) covariance = correlation * np.sqrt(var * var[:, None]) else: delta_r_par = (r_par_max - r_par_min) / num_bins_r_par delta_r_trans = (r_trans_max - 0.) / num_bins_r_trans if not args.do_not_smooth_cov: userprint("INFO: The covariance will be smoothed") covariance = smooth_cov(xi, weights, r_par, r_trans, delta_r_trans=delta_r_trans, delta_r_par=delta_r_par) else: userprint("INFO: The covariance will not be smoothed") covariance = compute_cov(xi, weights) xi = (xi * weights).sum(axis=0) weights = weights.sum(axis=0) w = weights > 0 xi[w] /= weights[w] try: scipy.linalg.cholesky(covariance) except scipy.linalg.LinAlgError: userprint("WARNING: Matrix is not positive definite") if args.dmat is not None: hdul = fitsio.FITS(args.dmat) dmat = hdul[1]['DM'][:] try: r_par_dmat = hdul[2]['RP'][:] r_trans_dmat = hdul[2]['RT'][:] z_dmat = hdul[2]['Z'][:] except IOError: r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() if dmat.shape == (xi.size, xi.size): r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() hdul.close() else: dmat = np.eye(len(xi)) r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() results = fitsio.FITS(args.out, 'rw', clobber=True) header = [{ 'name': 'RPMIN', 'value': r_par_min, 'comment': 'Minimum r-parallel' }, { 'name': 'RPMAX', 'value': r_par_max, 'comment': 'Maximum r-parallel' }, { 'name': 'RTMAX', 'value': r_trans_max, 'comment': 'Maximum r-transverse' }, { 'name': 'NP', 'value': num_bins_r_par, 'comment': 'Number of bins in r-parallel' }, { 'name': 'NT', 'value': num_bins_r_trans, 'comment': 'Number of bins in r-transverse' }] comment = [ 'R-parallel', 'R-transverse', 'Redshift', 'Correlation', 'Covariance matrix', 'Distortion matrix', 'Number of pairs' ] results.write([r_par, r_trans, z, xi, covariance, dmat, num_pairs], names=['RP', 'RT', 'Z', 'DA', 'CO', 'DM', 'NB'], comment=comment, header=header, extname='COR') comment = ['R-parallel model', 'R-transverse model', 'Redshift model'] results.write([r_par_dmat, r_trans_dmat, z_dmat], names=['DMRP', 'DMRT', 'DMZ'], comment=comment, extname='DMATTRI') results.close()
def main(cmdargs): """Export auto and cross-correlation for the fitter.""" parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Export auto and cross-correlation for the fitter.') parser.add_argument( '--data', type=str, default=None, required=True, help='Correlation produced via picca_cf.py, picca_xcf.py, ...') parser.add_argument('--out', type=str, default=None, required=True, help='Output file name') parser.add_argument( '--dmat', type=str, default=None, required=False, help=( 'Distortion matrix produced via picca_dmat.py, picca_xdmat.py... ' '(if not provided will be identity)')) parser.add_argument( '--cov', type=str, default=None, required=False, help=('Covariance matrix (if not provided will be calculated by ' 'subsampling)')) parser.add_argument( '--cor', type=str, default=None, required=False, help=('Correlation matrix (if not provided will be calculated by ' 'subsampling)')) parser.add_argument( '--remove-shuffled-correlation', type=str, default=None, required=False, help='Remove a correlation from shuffling the distribution of los') parser.add_argument('--do-not-smooth-cov', action='store_true', default=False, help='Do not smooth the covariance matrix') parser.add_argument( '--blind-corr-type', default=None, choices=['lyaxlya', 'lyaxlyb', 'qsoxlya', 'qsoxlyb'], help='Type of correlation. Required to apply blinding in DESI') args = parser.parse_args(cmdargs) hdul = fitsio.FITS(args.data) r_par = np.array(hdul[1]['RP'][:]) r_trans = np.array(hdul[1]['RT'][:]) z = np.array(hdul[1]['Z'][:]) num_pairs = np.array(hdul[1]['NB'][:]) weights = np.array(hdul[2]['WE'][:]) if 'DA_BLIND' in hdul[2].get_colnames(): xi = np.array(hdul[2]['DA_BLIND'][:]) data_name = 'DA_BLIND' else: xi = np.array(hdul[2]['DA'][:]) data_name = 'DA' head = hdul[1].read_header() num_bins_r_par = head['NP'] num_bins_r_trans = head['NT'] r_trans_max = head['RTMAX'] r_par_min = head['RPMIN'] r_par_max = head['RPMAX'] if "BLINDING" in head: blinding = head["BLINDING"] if blinding == 'minimal': blinding = 'corr_yshift' userprint("The minimal strategy is no longer supported." "Automatically switch to corr_yshift.") else: # if BLINDING keyword not present (old file), ignore blinding blinding = "none" hdul.close() if args.remove_shuffled_correlation is not None: hdul = fitsio.FITS(args.remove_shuffled_correlation) xi_shuffled = hdul['COR'][data_name][:] weight_shuffled = hdul['COR']['WE'][:] xi_shuffled = (xi_shuffled * weight_shuffled).sum(axis=1) weight_shuffled = weight_shuffled.sum(axis=1) w = weight_shuffled > 0. xi_shuffled[w] /= weight_shuffled[w] hdul.close() xi -= xi_shuffled[:, None] if args.cov is not None: userprint(("INFO: The covariance-matrix will be read from file: " "{}").format(args.cov)) hdul = fitsio.FITS(args.cov) covariance = hdul[1]['CO'][:] hdul.close() elif args.cor is not None: userprint(("INFO: The correlation-matrix will be read from file: " "{}").format(args.cor)) hdul = fitsio.FITS(args.cor) correlation = hdul[1]['CO'][:] hdul.close() if ((correlation.min() < -1.) or (correlation.min() > 1.) or (correlation.max() < -1.) or (correlation.max() > 1.) or np.any(np.diag(correlation) != 1.)): userprint(("WARNING: The correlation-matrix has some incorrect " "values")) var = np.diagonal(correlation) correlation = correlation / np.sqrt(var * var[:, None]) covariance = compute_cov(xi, weights) var = np.diagonal(covariance) covariance = correlation * np.sqrt(var * var[:, None]) else: delta_r_par = (r_par_max - r_par_min) / num_bins_r_par delta_r_trans = (r_trans_max - 0.) / num_bins_r_trans if not args.do_not_smooth_cov: userprint("INFO: The covariance will be smoothed") covariance = smooth_cov(xi, weights, r_par, r_trans, delta_r_trans=delta_r_trans, delta_r_par=delta_r_par) else: userprint("INFO: The covariance will not be smoothed") covariance = compute_cov(xi, weights) xi = (xi * weights).sum(axis=0) weights = weights.sum(axis=0) w = weights > 0 xi[w] /= weights[w] try: scipy.linalg.cholesky(covariance) except scipy.linalg.LinAlgError: userprint("WARNING: Matrix is not positive definite") if args.dmat is not None: hdul = fitsio.FITS(args.dmat) if data_name == "DA_BLIND" and 'DM_BLIND' in hdul[1].get_colnames(): dmat = np.array(hdul[1]['DM_BLIND'][:]) dmat_name = 'DM_BLIND' elif data_name == "DA_BlIND": userprint("Blinded correlations were given but distortion matrix " "is unblinded. These files should not mix. Exiting...") sys.exit(1) elif 'DM_BLIND' in hdul[1].get_colnames(): userprint( "Non-blinded correlations were given but distortion matrix " "is blinded. These files should not mix. Exiting...") sys.exit(1) else: dmat = hdul[1]['DM'][:] dmat_name = 'DM' try: r_par_dmat = hdul[2]['RP'][:] r_trans_dmat = hdul[2]['RT'][:] z_dmat = hdul[2]['Z'][:] except IOError: r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() if dmat.shape == (xi.size, xi.size): r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() hdul.close() else: dmat = np.eye(len(xi)) r_par_dmat = r_par.copy() r_trans_dmat = r_trans.copy() z_dmat = z.copy() dmat_name = 'DM_EMPTY' results = fitsio.FITS(args.out, 'rw', clobber=True) header = [ { 'name': "BLINDING", 'value': blinding, 'comment': 'String specifying the blinding strategy' }, { 'name': 'RPMIN', 'value': r_par_min, 'comment': 'Minimum r-parallel' }, { 'name': 'RPMAX', 'value': r_par_max, 'comment': 'Maximum r-parallel' }, { 'name': 'RTMAX', 'value': r_trans_max, 'comment': 'Maximum r-transverse' }, { 'name': 'NP', 'value': num_bins_r_par, 'comment': 'Number of bins in r-parallel' }, { 'name': 'NT', 'value': num_bins_r_trans, 'comment': 'Number of bins in r-transverse' }, { 'name': 'OMEGAM', 'value': head['OMEGAM'], 'comment': 'Omega_matter(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'OMEGAR', 'value': head['OMEGAR'], 'comment': 'Omega_radiation(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'OMEGAK', 'value': head['OMEGAK'], 'comment': 'Omega_k(z=0) of fiducial LambdaCDM cosmology' }, { 'name': 'WL', 'value': head['WL'], 'comment': 'Equation of state of dark energy of fiducial LambdaCDM cosmology' }, ] comment = [ 'R-parallel', 'R-transverse', 'Redshift', 'Correlation', 'Covariance matrix', 'Distortion matrix', 'Number of pairs' ] # Check if we need blinding and apply it if 'BLIND' in data_name or blinding != 'none': if blinding == 'corr_yshift': userprint("Blinding using strategy corr_yshift.") else: raise ValueError( "Expected blinding to be 'corr_yshift' or 'minimal'." " Found {}.".format(blinding)) if args.blind_corr_type is None: raise ValueError("Blinding strategy 'corr_yshift' requires" " argument --blind_corr_type.") # Check type of correlation and get size and regular binning if args.blind_corr_type in ['lyaxlya', 'lyaxlyb']: corr_size = 2500 rp_interp_grid = np.arange(2., 202., 4) rt_interp_grid = np.arange(2., 202., 4) elif args.blind_corr_type in ['qsoxlya', 'qsoxlyb']: corr_size = 5000 rp_interp_grid = np.arange(-197.99, 202.01, 4) rt_interp_grid = np.arange(2., 202, 4) else: raise ValueError("Unknown correlation type: {}".format( args.blind_corr_type)) if corr_size == len(xi): # Read the blinding file and get the right template blinding_filename = ( '/global/cfs/projectdirs/desi/science/lya/y1-kp6/' 'blinding/y1_blinding_v1.2_standard_29_03_2022.h5') else: # Read the regular grid blinding file and get the right template blinding_filename = ( '/global/cfs/projectdirs/desi/science/lya/y1-kp6/' 'blinding/y1_blinding_v1.2_regular_grid_29_03_2022.h5') if not os.path.isfile(blinding_filename): raise RuntimeError( "Missing blinding file. Make sure you are running at" " NERSC or contact picca developers") blinding_file = h5py.File(blinding_filename, 'r') hex_diff = np.array( blinding_file['blinding'][args.blind_corr_type]).astype(str) diff_grid = np.array([float.fromhex(x) for x in hex_diff]) if corr_size == len(xi): diff = diff_grid else: # Interpolate the blinding template on the regular grid interp = scipy.interpolate.RectBivariateSpline( rp_interp_grid, rt_interp_grid, diff_grid.reshape(len(rp_interp_grid), len(rt_interp_grid)), kx=3, ky=3) diff = interp.ev(r_par, r_trans) # Check that the shapes match if np.shape(xi) != np.shape(diff): raise RuntimeError( "Unknown binning or wrong correlation type. Cannot blind." " Please raise an issue or contact picca developers.") # Add blinding xi = xi + diff results.write([xi, r_par, r_trans, z, covariance, dmat, num_pairs], names=[data_name, 'RP', 'RT', 'Z', 'CO', dmat_name, 'NB'], comment=comment, header=header, extname='COR') comment = ['R-parallel model', 'R-transverse model', 'Redshift model'] results.write([r_par_dmat, r_trans_dmat, z_dmat], names=['DMRP', 'DMRT', 'DMZ'], comment=comment, extname='DMATTRI') results.close()
def main(cmdargs): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=('Compute the cross-covariance matrix between two ' 'correlations')) parser.add_argument( '--data1', type=str, default=None, required=True, help='Correlation 1 produced via picca_cf.py, picca_xcf.py, ...') parser.add_argument( '--data2', type=str, default=None, required=True, help='Correlation 2 produced via picca_cf.py, picca_xcf.py, ...') parser.add_argument('--out', type=str, default=None, required=True, help='Output file name') args = parser.parse_args(cmdargs) data = {} # Read data for index, filename in enumerate([args.data1, args.data2]): hdul = fitsio.FITS(filename) header = hdul[1].read_header() nside = header['NSIDE'] header2 = hdul[2].read_header() healpix_scheme = header2['HLPXSCHM'] weights = np.array(hdul[2]['WE'][:]) healpix_list = np.array(hdul[2]['HEALPID'][:]) if 'DA_BLIND' in hdul[2].get_colnames(): xi = np.array(hdul[2]['DA_BLIND'][:]) else: xi = np.array(hdul[2]['DA'][:]) data[index] = { 'DA': xi, 'WE': weights, 'HEALPID': healpix_list, 'NSIDE': nside, 'HLPXSCHM': healpix_scheme } hdul.close() # exit if NSIDE1 != NSIDE2 if data[0]['NSIDE'] != data[1]['NSIDE']: userprint(("ERROR: NSIDE are different: {} != " "{}").format(data[0]['NSIDE'], data[1]['NSIDE'])) sys.exit() # exit if HLPXSCHM1 != HLPXSCHM2 if data[0]['HLPXSCHM'] != data[1]['HLPXSCHM']: userprint(("ERROR: HLPXSCHM are different: {} != " "{}").format(data[0]['HLPXSCHM'], data[1]['HLPXSCHM'])) sys.exit() # Add unshared healpix as empty data for key in sorted(list(data.keys())): key2 = (key + 1) % 2 w = np.logical_not(np.in1d(data[key2]['HEALPID'], data[key]['HEALPID'])) if w.sum() > 0: new_healpix = data[key2]['HEALPID'][w] num_new_healpix = new_healpix.size num_bins = data[key]['DA'].shape[1] userprint(("Some healpix are unshared in data {}: " "{}").format(key, new_healpix)) data[key]['DA'] = np.append(data[key]['DA'], np.zeros((num_new_healpix, num_bins)), axis=0) data[key]['WE'] = np.append(data[key]['WE'], np.zeros((num_new_healpix, num_bins)), axis=0) data[key]['HEALPID'] = np.append(data[key]['HEALPID'], new_healpix) # Sort the data by the healpix values for key in sorted(list(data.keys())): sort = np.array(data[key]['HEALPID']).argsort() data[key]['DA'] = data[key]['DA'][sort] data[key]['WE'] = data[key]['WE'][sort] data[key]['HEALPID'] = data[key]['HEALPID'][sort] # Append the data xi = np.append(data[0]['DA'], data[1]['DA'], axis=1) weights = np.append(data[0]['WE'], data[1]['WE'], axis=1) # Compute the covariance covariance = compute_cov(xi, weights) # Get the cross-covariance num_bins = data[0]['DA'].shape[1] cross_covariance = covariance.copy() cross_covariance = cross_covariance[:, num_bins:] cross_covariance = cross_covariance[:num_bins, :] ### Get the cross-correlation var = np.diagonal(covariance) cor = covariance / np.sqrt(var * var[:, None]) cross_correlation = cor.copy() cross_correlation = cross_correlation[:, num_bins:] cross_correlation = cross_correlation[:num_bins, :] ### Test if valid try: scipy.linalg.cholesky(covariance) except scipy.linalg.LinAlgError: userprint('WARNING: Matrix is not positive definite') ### Save results = fitsio.FITS(args.out, 'rw', clobber=True) results.write([cross_covariance, cross_correlation], names=['CO', 'COR'], comment=['Covariance matrix', 'Correlation matrix'], extname='COVAR') results.close()