def process_2mass_vot(input_file, out_dir=dirconfig.proc_2mass): """ This function reads the votable 2MASS catalogs and extract sources that are in the region of the respective tile and also select sources with Qflag = AAA. :type out_dir: string :param input_file: path to the 2MASS file (in vot format) :param out_dir: output path :return: """ # Check if files exist if files_exist(input_file): print(f'Processing file {input_file}') table = Table.read(input_file, format='votable') # Extract tile name and number filename = path.splitext(path.split(input_file)[-1])[0] tile_name = filename.split("_")[0] tile_num = tile_name.replace('t', '') tile = objects.all_tiles[tile_name] # Unique Object ID object_id = np.arange(len(table), dtype=np.int32) + 1 table['id'] = [f'2mass-{tile_name}_{oid:07d}' for oid in object_id] # Add columns with magnitudes in VVV photometric system transformation_2mass_to_vista(table) # Select objects in the area of interest table['RAJ2000'].unit = u.deg table['DEJ2000'].unit = u.deg aux = SkyCoord(ra=table['RAJ2000'], dec=table['DEJ2000']).galactic table['l'] = aux.l table['b'] = aux.b lmin, lmax = tile.lmin, tile.lmax bmin, bmax = tile.bmin, tile.bmax l_filter = (table['l'] >= lmin) * (table['l'] <= lmax) b_filter = (table['b'] >= bmin) * (table['b'] <= bmax) q_filter = table['Qflg'] == 'AAA' match = l_filter * b_filter * q_filter date_time = datetime.utcnow() table.meta = {'TILE': int(tile_num), 'F2MASS': input_file, 'STAGE': 'process_2mass_vot', 'CATYPE': '2mass', 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'AUTHOR': 'Jorge Anais'} filename += '.fits' out_path = path.join(out_dir, filename) write_fits_table(table[match], out_path)
def process_gaia_vot(input_file, out_dir=dirconfig.proc_gaia, features=None): """ This function transform extract relevant features from vo-table (from gaia query) in order to produce more easily manageable files. :param features: A list with desired features from Gaia :param input_file: :param out_dir: """ # Check if files exist if files_exist(input_file): print(f'Processing file {input_file}') # Read table tbl = Table.read(input_file, format='votable') tile_name = path.basename(input_file).replace('_gaia.vot.gz', '') tile_num = tile_name.replace('t', '') # Unique Object ID object_id = np.arange(len(tbl), dtype=np.int32) + 1 tbl['id'] = [f'gaia-{tile_name}_{oid:07d}' for oid in object_id] # Extract only sources with parallax mask = ~tbl['parallax'].mask tbl = tbl[mask] # Default features to be extracted from gaia votable if features is None: features = ['id', 'ra', 'ra_error', 'dec', 'dec_error', 'l', 'b', 'parallax', 'parallax_error', 'parallax_over_error', 'pmra', 'pmra_error', 'pmdec', 'pmdec_error', 'phot_g_mean_flux', 'phot_g_mean_flux_error', 'phot_g_mean_flux_over_error', 'phot_g_mean_mag', 'phot_bp_mean_flux', 'phot_bp_mean_flux_error', 'phot_bp_mean_flux_over_error', 'phot_bp_mean_mag', 'phot_rp_mean_flux', 'phot_rp_mean_flux_error', 'phot_rp_mean_flux_over_error', 'phot_rp_mean_mag', 'phot_bp_rp_excess_factor', 'bp_rp', 'bp_g', 'g_rp', 'radial_velocity', 'radial_velocity_error', 'source_id'] filtered_tbl = tbl[features] filename_out = path.basename(input_file).replace('.vot.gz', '') + '.fits' filename_out = path.join(out_dir, filename_out) print(f'Writing file: {filename_out}') date_time = datetime.utcnow() filtered_tbl.meta = {'TILE': int(tile_num), 'FGAIA': input_file, 'STAGE': 'process_gaia_vot', 'CATYPE': 'gaia', 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'AUTHOR': 'Jorge Anais'} write_fits_table(filtered_tbl, filename_out)
def plot_clustered_data(table, output_dir=dirconfig.test_knowncl, summarized_scores_table=None): """ This function helps to visualize the results of do_hdbscan function. It produces the followings plots: - l vs b - J-H vs H-Ks - Ks vs J-Ks - Q vs Ks - PM_dec vs PM_ra (if present) - parallax histogram (if present) :param table: An astropy table produced by apolo.clustering.ctools.do_hdbscan() function. :param output_dir: string. Path to a dir where output are saved :param summarized_scores_table: :return: """ # Check if those columns exist cols_min = ('l', 'b', 'mag_Ks', 'H-Ks', 'J-Ks', 'J-H', 'Q') if not all(_ in table.columns for _ in cols_min): raise ValueError('Table does not contain all requested columns') # Check if there are proper motions and parallax proper_motions = True cols_pm = ('pmra', 'pmdec', 'plx') if not all(_ in table.columns for _ in cols_pm): proper_motions = False table.sort('label') # filtro = table['mag_Ks'] < 15.0 #TODO: quitar!!!! # table = table[filtro] labels = table['label'] probabilities = table['probabilities'] cluster_number = len(np.unique(labels)) # Add colors to the table color_palette = sns.color_palette('bright', cluster_number) cluster_colors = [ color_palette[x] if x >= 0 else (0.5, 0.5, 0.5) for x in labels ] cluster_member_colors = [ sns.desaturate(x, p) for x, p in zip(cluster_colors, probabilities) ] table['color'] = cluster_member_colors noise = table[table['label'] == -1] clust = table[table['label'] != -1] # Read metadata properties = {} superior_title = '' object_name = '' if 'OBJNAME' in table.meta: object_name = table.meta['OBJNAME'] if 'ALGORIT' in table.meta: if table.meta['ALGORIT'] == 'hdbscan': superior_title += object_name + '\n' properties['MCS'] = table.meta['MCS'] properties['MS'] = table.meta['MS'] properties['CSM'] = table.meta['CSELMTD'] properties['SP'] = table.meta['SPARAMS'] if 'SCORE' in table.meta: properties['SCORE'] = round(table.meta['SCORE'], 4) if 'CH_SCORE' in table.meta: properties['CH_SCORE'] = round(table.meta['CH_SCORE'], 4) if 'MAXSCORE' in table.meta: properties['MAXSCORE'] = round(table.meta['MAXSCORE'], 4) properties['FILE'] = path.basename(table.meta['FILE']) else: superior_title += 'No metadata available' else: superior_title += 'No metadata available' for k, v in properties.items(): superior_title += k + '=' + str(v) + ' ' if summarized_scores_table is not None: max_score = summarized_scores_table['score'][0] ms = summarized_scores_table['ms'][0] mcs_start = summarized_scores_table['mcs_start'][0] mcs_end = summarized_scores_table['mcs_end'][0] superior_title += f'\n max_score: {max_score:.6f} ms: {ms} mcs_range: {int(mcs_start)} - {int(mcs_end)}\n' # -----Visualization----- my_dpi = 100 plt.figure(figsize=(1920 / my_dpi, 1080 / my_dpi), dpi=my_dpi) # Common parameters for ptl.scatter kargs_noise = dict(s=50, linewidth=0, alpha=0.10, marker='.') kargs_cl = dict(s=50, linewidth=0, alpha=0.50, marker='.') # Title plt.suptitle(superior_title, fontsize='large') # l b ax1 = plt.subplot(231) plt.xlabel('l, deg', fontweight='bold') plt.ylabel('b, deg', fontweight='bold') plt.scatter(noise['l'], noise['b'], c=noise['color'], **kargs_noise) plt.scatter(clust['l'], clust['b'], c=clust['color'], **kargs_cl) xmin, xmax = plt.xlim() plt.xlim(xmax, xmin) # plot circle if 'OBJNAME' in table.meta: cluster = objects.all_regions[table.meta['OBJNAME']] r = cluster.asize.to_value(unit=u.deg) / 2. l_cluster = cluster.coord.l.to_value(unit=u.deg) b_cluster = cluster.coord.b.to_value(unit=u.deg) theta = np.linspace(0, 2 * np.pi, 100) x1 = r * np.cos(theta) + l_cluster x2 = r * np.sin(theta) + b_cluster plt.plot(x1, x2, color=(0, .7, 0)) ax1.set_aspect('equal') # J-H vs H-Ks plt.subplot(232) plt.scatter(noise['H-Ks'], noise['J-H'], c=noise['color'], **kargs_noise) plt.scatter(clust['H-Ks'], clust['J-H'], c=clust['color'], **kargs_cl) plt.xlabel('(H - Ks), mag', fontweight='bold') plt.ylabel('(J - H), mag', fontweight='bold') plt.xlim(-0.5, 2.5) plt.ylim(0.3, 4.5) # Ks vs J-Ks plt.subplot(233) plt.scatter(noise['J-Ks'], noise['mag_Ks'], c=noise['color'], **kargs_noise) plt.scatter(clust['J-Ks'], clust['mag_Ks'], c=clust['color'], **kargs_cl) plt.xlabel('(J - Ks), mag', fontweight='bold') plt.ylabel('Ks, mag', fontweight='bold') ymin, ymax = plt.ylim() plt.ylim(ymax, ymin) plt.xlim(0.0, 6.0) plt.ylim(18, 8) # Q vs Ks ce = table.meta['CEXCESS'] plt.subplot(234) plt.scatter(noise['Q'], noise['mag_Ks'], c=noise['color'], **kargs_noise) plt.scatter(clust['Q'], clust['mag_Ks'], c=clust['color'], **kargs_cl) plt.xlabel(f'Q=(J-H)-{ce:.2f}(H-Ks), mag', fontweight='bold') plt.ylabel('Ks, mag', fontweight='bold') ymin, ymax = plt.ylim() plt.ylim(ymax, ymin) plt.xlim(-1.05, 1.05) plt.ylim(18, 8) if proper_motions: # proper motions plt.subplot(235) plt.scatter(noise['pmra'], noise['pmdec'], c=noise['color'], **kargs_noise) plt.scatter(clust['pmra'], clust['pmdec'], c=clust['color'], **kargs_cl) plt.xlabel('pmra, mas/yr', fontweight='bold') plt.ylabel('pmdec, mas/yr', fontweight='bold') plt.xlim(-10.1, 6.1) plt.ylim(-10.1, 6.1) # parallax histogram plt.subplot(236) kwargs = dict(histtype='stepfilled', alpha=0.4, ec="k") # bin_width = 1. for label in range(cluster_number - 1): mask = table['label'] == label legend = f'{label}: {sum(mask)}' parallax = table['plx'][mask] # plt.hist(parallax, bins=np.arange(np.min(parallax), np.max(parallax) + bin_width, bin_width), # label=legend, color=color_palette[label], **kwargs) plt.hist(parallax, bins=3, label=legend, color=color_palette[label], **kwargs) plt.xlabel('VIRAC plx, mas', fontweight='bold') plt.legend(prop={'size': 10}) # Create filename based on object-name or time if not provided if object_name: filename_base = object_name elif 'TILENAME' in table.meta: filename_base = table.meta['TILENAME'] else: filename_base = str(time.time()) # Add hyper-parameters to the filename if 'SPARAMS' and 'MCS' and 'MS' and 'CSELMTD' in table.meta: filename_base = f"{filename_base}_{table.meta['SPARAMS']}_{table.meta['MCS']:1.0f}_{table.meta['MS']:1.0f}_{table.meta['CSELMTD']}" # Save plot as .png image filename_plot = path.join(output_dir, filename_base + '.png') plt.savefig(filename_plot, format='png') plt.clf() # Save table as fits filename_table = path.join(output_dir, filename_base + '.fits') write_fits_table(table, filename_table)
def rectangular_tiling(table, l_grid, b_grid, partitioning_id=0, write_fits=False, output_dir=dirconfig.test_tiling, log_table=Table(names=('tile', 'n', 'l_min', 'l_max', 'b_min', 'b_max', 'area'))): """ This grid receives two numpy arrays with the limits of the grid in both coordinates (l, b) and produces the tiling over the astropytable given Parameters ---------- table: An astropy table l_grid: a sorted numpy array with the values of the edges of the tiles in l b_grid: a sorted numpy array with the values of the edges of the tiles in l partitioning_id: integer, used to identify tile partitioning from other partitioning write_fits: Boolean, gives the option to write the fits file to a output dir output_dir: string, path where fits files are saved. log_table: An astropytable used as log Returns ------- A dictionary with all the tile objects produced """ tile_list = [] tile_number = 0 for index_l in range(len(l_grid) - 1): for index_b in range(len(b_grid) - 1): tile_id = f'bf{partitioning_id}_{tile_number:04d}' print(tile_id) l_min = l_grid[index_l] l_max = l_grid[index_l + 1] b_min = b_grid[index_b] b_max = b_grid[index_b + 1] tile_list.append((tile_id, l_min, l_max, b_min, b_max)) if write_fits: mask_lmin = table['l'] >= l_min mask_lmax = table['l'] < l_max mask_bmin = table['b'] >= b_min mask_bmax = table['b'] < b_max mask = mask_lmin * mask_lmax * mask_bmin * mask_bmax table_portion = table[mask] write_fits_table( table_portion, path.join( output_dir, f'tile_bf{partitioning_id}_{tile_number:04d}.fits')) # Log area = (l_max - l_min) * ( np.sin(b_max * np.pi / 180.0) - np.sin(b_min * np.pi / 180.0)) * 180.0 / np.pi log_table.add_row([ tile_number, len(table_portion), l_min, l_max, b_min, b_max, area ]) tile_number += 1 if write_fits: log_table.write(path.join(output_dir, f'log_tiling_bf{partitioning_id}'), format='ascii.ecsv') tiles_objects_dict = dict() for t in tile_list: tiles_objects_dict.update({t[0]: models.Tile(*t)}) return tiles_objects_dict
def process_combis_csv(input_file, out_dir=dirconfig.proc_combis, combis_phot=False): """ This function simply transform combis catalogs (with proper motions) from a csv file to a fits file. :param combis_phot: Consider only data with complete photometrical information in bands mj, mh and mk :param input_file: String. Path to the combi catalog (*.csv file) :param out_dir: String. Output directory :return: """ # Check if files exist if files_exist(input_file): print(f'Processing file {input_file}') table = Table.read(input_file, format='csv') mask_nan_values(table) # Filename and tile number filename = path.splitext(path.basename(input_file))[0] tile_num = filename.split('_')[0].replace('d', '') # Unique Object ID object_id = np.arange(len(table), dtype=np.int32) + 1 table['id'] = [f'combi-t{tile_num}_{oid:07d}' for oid in object_id] # Create l and b columns table['ra'].unit = u.deg table['dec'].unit = u.deg aux = SkyCoord(ra=table['ra'], dec=table['dec']).galactic table['l'] = aux.l table['b'] = aux.b # Remove sources with missing data mask = ~table['pmra'].mask * ~table['pmdec'].mask if combis_phot: # Special case, Only consider rows with complete photometric info mask *= ~table['mj'].mask * ~table['mh'].mask * ~table['mk'].mask aux = table[mask] # Create colors aux['mh-mk'] = aux['mh'] - aux['mk'] aux['mj-mk'] = aux['mj'] - aux['mk'] aux['mj-mh'] = aux['mj'] - aux['mh'] if combis_phot: # Replace column names with vvv-like ones original_col_names = ('mj', 'mh', 'mk', 'mh-mk', 'mj-mk', 'mj-mh') vvvlike_col_names = ('mag_J', 'mag_H', 'mag_Ks', 'H-Ks', 'J-Ks', 'J-H') for original_col_name, vvvlike_col_name in zip(original_col_names, vvvlike_col_names): aux.rename_column(original_col_name, vvvlike_col_name) out = path.join(out_dir, filename + '.fits') date_time = datetime.utcnow() aux.meta = {'TILE': int(tile_num), 'FCOMBI': input_file, 'STAGE': 'process_combis_csv', 'CATYPE': 'combi', 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'AUTHOR': 'Jorge Anais'} if combis_phot: aux.meta.update({'CATYPE': 'combisphot'}) write_fits_table(aux, out)
def process_vvv_cals(input_file, out_dir=dirconfig.proc_vvv): """ This function take a raw PSF tile in plain text format (.cals) and transform it to a fits file, removing sources that does not contain all J, H and Ks data. Fits files are handled with much better performance in python than plain-text files. For some reason, reading .cals files uses a lot of memory (~6 gb). :param input_file: String. Path to the .cals file :param out_dir: output directory :return: """ # Check if files exist if files_exist(input_file): print(f'Processing file {input_file}') table = Table.read(input_file, format='ascii') # Check if all the columns exist in table expected_cols = ['ra', 'dec', 'mag_Z', 'er_Z', 'mag_Y', 'er_Y', 'mag_J', 'er_J', 'mag_H', 'er_H', 'mag_Ks', 'er_Ks'] if not all(_ in table.columns for _ in expected_cols): raise KeyError(f'Table does not contain all expected columns: {expected_cols}') # Filename and tile number input_filename = path.basename(input_file) tile_num = path.splitext(input_filename.replace('zyjhk', ''))[0] # Unique Object ID object_id = np.arange(len(table), dtype=np.int32) + 1 table['id'] = [f'vvv-t{tile_num}_{oid:07d}' for oid in object_id] # Create l and b columns table['ra'].unit = u.deg table['dec'].unit = u.deg aux = SkyCoord(ra=table['ra'], dec=table['dec']).galactic table['l'] = aux.l table['b'] = aux.b # Print some stats of the tile (before removing sources) # table.info('stats') # Create colors mask = ~table['mag_J'].mask * ~table['mag_H'].mask * ~table['mag_Ks'].mask aux = table[mask] aux['H-Ks'] = aux['mag_H'] - aux['mag_Ks'] aux['J-Ks'] = aux['mag_J'] - aux['mag_Ks'] aux['J-H'] = aux['mag_J'] - aux['mag_H'] # print(f'Number of remaining sources: {len(aux)}') # Save aux table as fits file with metadata date_time = datetime.utcnow() aux.meta = {'TILE': int(tile_num), 'FVVV': input_file, 'STAGE': 'VVV cals file to fits', 'CATYPE': 'vvv', 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'AUTHOR': 'Jorge Anais'} out_fn = 't' + tile_num + '_vvv.fits' out_path = path.join(out_dir, out_fn) output_table = aux['id', 'ra', 'dec', 'l', 'b', 'mag_Z', 'er_Z', 'mag_Y', 'er_Y', 'mag_J', 'er_J', 'mag_H', 'er_H', 'mag_Ks', 'er_Ks', 'H-Ks', 'J-Ks', 'J-H'] write_fits_table(output_table, out_path)
import numpy as np """ This script performs a spatial tiling over a catalog an produces as much files as tiles using a density aware approach (kd-tree method). """ table = read_fits_table( path.join(dirconfig.test_tiling, 'complete_region.fits')) kd_tree_tiling(table, leaf_size=5000) # 4096 # kd_tree_tiling(table, leaf_size=10000) # 2048 # kd_tree_tiling(table, leaf_size=20000) # 1024 n_tiles = max(table['tile']) + 1 log_table = Table(names=('tile', 'n', 'l_min', 'l_max', 'b_min', 'b_max', 'area')) for tile in range(n_tiles): tile_selection = table[table['tile'] == tile] print(f'tile_{tile:04d}.fits') write_fits_table(tile_selection, path.join(dirconfig.test_tiling, f'tile_{tile:04d}.fits')) n = len(tile_selection) l_min, l_max = tile_selection['l'].min(), tile_selection['l'].max() b_min, b_max = tile_selection['b'].min(), tile_selection['b'].max() area = (l_max - l_min) * (np.sin(b_max * np.pi / 180.0) - np.sin(b_min * np.pi / 180.0)) * 180.0 / np.pi log_table.add_row([tile, n, l_min, l_max, b_min, b_max, area]) log_table.write(path.join(dirconfig.test_tiling, 'log_tiling_4096.ecsv'), format='ascii.ecsv')
from apolo.tiling.tools import join_tiles from apolo.data import dirconfig from apolo.catalog_proc.utils import read_fits_table, write_fits_table from os import path path.join(dirconfig.cross_vvv_combis_gaia, 't067_vvv-2mass-combi-gaia_clean.fits') t067 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia, 't067_vvv-2mass-combi-gaia_clean.fits')) t0105 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia,'t105_vvv-2mass-combi-gaia_clean.fits')) t067_t105 = join_tiles(t067, t0105) file_t067_t105 = path.join(dirconfig.test_tiling, 't067_t105.fits') write_fits_table(t067_t105, file_t067_t105) t068 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia,'t068_vvv-2mass-combi-gaia_clean.fits')) t0106 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia,'t106_vvv-2mass-combi-gaia_clean.fits')) t068_t106 = join_tiles(t068, t0106) file_t068_t106 = path.join(dirconfig.test_tiling, 't068_t106.fits') write_fits_table(t068_t106, file_t068_t106) t069 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia,'t069_vvv-2mass-combi-gaia_clean.fits')) t0107 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia,'t107_vvv-2mass-combi-gaia_clean.fits')) t069_t107 = join_tiles(t069, t0107) file_t069_t107 = path.join(dirconfig.test_tiling, 't069_t107.fits') write_fits_table(t069_t107, file_t069_t107) t070 = read_fits_table(path.join(dirconfig.cross_vvv_combis_gaia,'t070_vvv-2mass-combi-gaia_clean.fits')) t0108 = read_fits_table('t108_vvv-2mass-combi-gaia_clean.fits') t070_t108 = join_tiles(t070, t0108) file_t070_t108 = path.join(dirconfig.test_tiling, 't070_t108.fits') write_fits_table(t070_t108, file_t070_t108)
def add_proper_motions(phot_file, pm_file, out_dir=dirconfig.test_knowncl): """ Function that match proper motion catalog and VVV clean catalogs. :param pm_file: :param phot_file: :param out_dir: :return: """ # Check if files exist if files_exist(pm_file, phot_file): print(f'Processing files:', phot_file, pm_file) # Read tables tbl_phot = read_fits_table(phot_file) tbl_pm = read_fits_table(pm_file) # Check if tile numbers match if not tbl_phot.meta['TILE'] == tbl_pm.meta['TILE']: raise ValueError(f'Files do not correspond to the same tile') # Cross-match cphot = SkyCoord(tbl_phot['ra'], tbl_phot['dec']) cpm = SkyCoord(tbl_pm['ra'], tbl_pm['dec']) idx, d2d, d3d = cphot.match_to_catalog_sky(cpm) match = d2d < 0.34 * u.arcsec # Remove duplicated matches # In this case we prefer to omit sources with duplicated matches unique_idx, count = np.unique(idx[match], return_counts=True) duplicated_idxs = unique_idx[count > 1] for i in duplicated_idxs: match[idx == i] = False # join table of matched sources join_table = hstack([tbl_phot, tbl_pm[idx]], uniq_col_name='{col_name}{table_name}', table_names=['', '_pm']) match_table = join_table[match] # Setup names and output file tile_number = tbl_phot.meta['TILE'] catype = tbl_phot.meta['CATYPE'] + '-' + tbl_pm.meta['CATYPE'] date_time = datetime.utcnow() match_table.meta = { 'TILE': tile_number, 'FCOMBI': pm_file, 'FPHOT': phot_file, 'STAGE': 'add_proper_motions', 'CATYPE': catype, 'NPHOT': len(tbl_phot), 'NCOMBI': len(tbl_pm), 'NDUPL': len(idx[match]) - len(np.unique(idx[match])), 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'AUTHOR': 'Jorge Anais' } # Save file fname = f't{tile_number:03d}_{catype}.fits' outfile = path.join(out_dir, fname) write_fits_table(match_table, outfile)
def gaia_cleaning(fname_phot, fname_gaia, clean_dir=dirconfig.cross_vvv_gaia, cont_dir=dirconfig.cross_vvv_gaia_cont, save_contam=True, distance=1.0): """ This function matches gaia sources against VVV sources. Sources with a distance less than 1 kpc are considered contaminants and are removed from vvv catalog. This function generate two tables, one with the cleaned table and the other with the contaminants. :param fname_phot: String, path to the catalog to be cleaned :param fname_gaia: String, path to the gaia catalog :param clean_dir: String, output dir :param cont_dir: String, output dir for contaminants :param save_contam: Boolean :param distance: Float, distance in kpc :return: """ # Check if files exist if files_exist(fname_phot, fname_gaia): print(f'Processing files: ', fname_phot, fname_gaia) # Load tables tbl_phot = read_fits_table(fname_phot) tbl_gaia = read_fits_table(fname_gaia) # Check if tile match if not tbl_phot.meta['TILE'] == tbl_gaia.meta['TILE']: raise ValueError(f'Files do not correspond to the same tile') tile_number = tbl_phot.meta['TILE'] # Apply threshold to gaia data threshold = 1.0 / distance match_parallax = tbl_gaia['parallax'] >= threshold tbl_gaia_par = tbl_gaia[match_parallax] # Cross-match cphot = SkyCoord(tbl_phot['ra'], tbl_phot['dec']) cgaia = SkyCoord(tbl_gaia_par['ra'], tbl_gaia_par['dec']) idx, d2d, d3d = cphot.match_to_catalog_sky(cgaia) match = d2d < 0.34 * u.arcsec # Remove duplicated matches # We only consider sources with 1 to 1 match unique_idx, count = np.unique(idx[match], return_counts=True) duplicated_idxs = unique_idx[count > 1] for i in duplicated_idxs: match[idx == i] = False # join table of matched sources join_table = hstack([tbl_phot, tbl_gaia_par[idx]]) # Catalog with contaminants (objects that are closer than "distance") contam_table = join_table[match] # Add metadata catype = tbl_phot.meta['CATYPE'] + '-' + tbl_gaia.meta['CATYPE'] date_time = datetime.utcnow() contam_table.meta = { 'TILE': int(tile_number), 'FGAIA': fname_gaia, 'FPHOT': fname_phot, 'STAGE': 'gaia_cleaning', 'CATYPE': catype + 'CONT', 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'DIST': distance, 'SELECT': 'contaminants', 'NDUPL': len(idx[match]) - len(np.unique(idx[match])), 'AUTHOR': 'Jorge Anais' } # Cleaned catalog clean_catalog = tbl_phot[~match] clean_catalog.meta = { 'TILE': int(tile_number), 'FGAIA': fname_gaia, 'FPHOT': fname_phot, 'STAGE': 'gaia_cleaning', 'CATYPE': catype, 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'DIST': distance, 'SELECT': 'clean', 'AUTHOR': 'Jorge Anais' } # Save clean catalog to a fits file filename = f't{tile_number:03d}_{catype}' path_out = path.join(clean_dir, filename + '_clean.fits') write_fits_table(clean_catalog, path_out) # Save contaminants if save_contam: path_out = path.join(cont_dir, filename + '_contaminants.fits') write_fits_table(contam_table, path_out)
def combine_vvv_2mass(vvvpsf_file, twomass_file, out_dir=dirconfig.cross_vvv_2mass, max_error=1.00): """ This function add 2MASS sources to the VVV-PSF catalog :param twomass_file: string :param vvvpsf_file: string :param out_dir: string :param max_error: number :return: """ # Check if files exist if files_exist(twomass_file, vvvpsf_file): print('Combining: ', vvvpsf_file, twomass_file) # Read catalogs twomass_table = read_fits_table(twomass_file) vvvpsf_table = read_fits_table(vvvpsf_file) # Check if tile match if not twomass_table.meta['TILE'] == vvvpsf_table.meta['TILE']: raise ValueError(f'Files do not correspond to the same tile') # Cross-match c2mass = SkyCoord(twomass_table['RAJ2000'], twomass_table['DEJ2000'], unit='deg') cvvv = SkyCoord(vvvpsf_table['ra'], vvvpsf_table['dec'], unit='deg') idx, d2d, d3d = c2mass.match_to_catalog_sky(cvvv) match = d2d > max_error * u.arcsec # In this case repeated sources are not removed (otherwise they will be included in the output catalog) unpaired_2mass_sources = twomass_table[match] # Create a new table to store combined data unp_table = Table() # Add unpaired 2MASS sources to new_catalog unp_table['ra'] = unpaired_2mass_sources['RAJ2000'] unp_table['dec'] = unpaired_2mass_sources['DEJ2000'] unp_table['l'] = unpaired_2mass_sources['l'] unp_table['b'] = unpaired_2mass_sources['b'] unp_table['mag_J'] = unpaired_2mass_sources['J_vista'] unp_table['eJ'] = unpaired_2mass_sources['e_Jmag'] unp_table['mag_H'] = unpaired_2mass_sources['H_vista'] unp_table['eH'] = unpaired_2mass_sources['e_Hmag'] unp_table['mag_Ks'] = unpaired_2mass_sources['Ks_vista'] unp_table['eKs'] = unpaired_2mass_sources['e_Kmag'] unp_table['H-Ks'] = unpaired_2mass_sources[ 'H_vista'] - unpaired_2mass_sources['Ks_vista'] unp_table['J-Ks'] = unpaired_2mass_sources[ 'J_vista'] - unpaired_2mass_sources['Ks_vista'] unp_table['J-H'] = unpaired_2mass_sources[ 'J_vista'] - unpaired_2mass_sources['H_vista'] unp_table['catalog'] = [ '2MASS' for _ in range(len(unpaired_2mass_sources)) ] unp_table['id'] = unpaired_2mass_sources['id'] # Aux catalog for VVV-PSF sources aux_table = Table() # Add VVV-PSF sources to new_catalog aux_table['ra'] = vvvpsf_table['ra'] aux_table['dec'] = vvvpsf_table['dec'] aux_table['l'] = vvvpsf_table['l'] aux_table['b'] = vvvpsf_table['b'] aux_table['mag_Z'] = Table.MaskedColumn(vvvpsf_table['mag_Z'].data, mask=np.isnan( vvvpsf_table['mag_Z'].data)) aux_table['er_Z'] = Table.MaskedColumn(vvvpsf_table['er_Z'].data, mask=np.isnan( vvvpsf_table['er_Z'].data)) aux_table['mag_Y'] = Table.MaskedColumn(vvvpsf_table['mag_Y'].data, mask=np.isnan( vvvpsf_table['mag_Y'].data)) aux_table['er_Y'] = Table.MaskedColumn(vvvpsf_table['er_Y'].data, mask=np.isnan( vvvpsf_table['er_Y'].data)) aux_table['mag_J'] = vvvpsf_table['mag_J'] aux_table['eJ'] = vvvpsf_table['er_J'] aux_table['mag_H'] = vvvpsf_table['mag_H'] aux_table['eH'] = vvvpsf_table['er_H'] aux_table['mag_Ks'] = vvvpsf_table['mag_Ks'] aux_table['eKs'] = vvvpsf_table['er_Ks'] aux_table['H-Ks'] = vvvpsf_table['H-Ks'] aux_table['J-Ks'] = vvvpsf_table['J-Ks'] aux_table['J-H'] = vvvpsf_table['J-H'] aux_table['catalog'] = ['PSF-VVV' for _ in range(len(vvvpsf_table))] aux_table['id'] = vvvpsf_table['id'] output_table = vstack([unp_table, aux_table]) # Add metadata to the new file date_time = datetime.utcnow() tile = vvvpsf_table.meta['TILE'] catype = vvvpsf_table.meta['CATYPE'] + '-' + twomass_table.meta['CATYPE'] output_table.meta = { 'TILE': tile, 'F2MASS': twomass_file, 'N2MASS': len(unp_table), 'FVVV': vvvpsf_file, 'NVVV': len(vvvpsf_table), 'STAGE': 'combine_vvv_2mass', 'CATYPE': catype, 'CDATE': date_time.strftime('%Y-%m-%d'), 'CTIME': date_time.strftime('%H:%M:%S'), 'AUTHOR': 'Jorge Anais' } # Write output table fname = f't{tile:03d}_{catype}.fits' output_file = path.join(out_dir, fname) write_fits_table(output_table, output_file)