from os import listdir from os.path import isfile, join import matplotlib.pyplot as plt import numpy as np from astropy.coordinates import SkyCoord from lofarnn.data.cutouts import convert_to_valid_color from astropy.io import fits from astropy.table import Table from lofarnn.models.dataloaders.utils import get_lotss_objects from astropy import units as u from radio_beam import Beam from astropy.wcs import WCS from astropy.wcs.utils import skycoord_to_pixel, proj_plane_pixel_scales lobjects = get_lotss_objects( "/home/bieker/Downloads/LOFAR_HBA_T1_DR1_merge_ID_optical_f_v1.2_restframe.fits" ) lobjects.info() print( f"Min/Max: ObjID: {np.min(lobjects['objID'].data)}/{np.max(lobjects['objID'].data)}" ) print(f"Min/Max: PS: {lobjects['AllWISE'].data}") #exit() gauss_catalog = "/home/bieker/Downloads/LOFAR_HBA_T1_DR1_catalog_v0.99.gaus.fits" component_catalog = "/home/bieker/Downloads/LOFAR_HBA_T1_DR1_merge_ID_v1.2.comp.fits" # Use component Name from comp catalog to select gaussian gauss_cat = Table.read(gauss_catalog).to_pandas() component_catalog = Table.read(component_catalog).to_pandas() """
import matplotlib.pyplot as plt import numpy as np from lofarnn.models.dataloaders.utils import get_lotss_objects dat = get_lotss_objects("/home/s2153246/data/dr2_combined.fits") for col in ["MAG_R", "MAG_W1", "MAG_W2"]: plt.hist( np.nan_to_num(dat[col]), bins=np.arange(np.nanpercentile(dat[col], 2), np.nanpercentile(dat[col], 98), 0.5), ) plt.title(col) plt.savefig(f"Legacy_{col}.png", dpi=300) print( f"{col}: Max: {np.nanmax(dat[col])}, Min: {np.nanmin(dat[col])}, Median: {np.nanmedian(dat[col])}, " f"1st Percentile: {np.nanpercentile(dat[col], 1)}, 5th Percentile: {np.nanpercentile(dat[col], 5)}, 95th Percentile: {np.nanpercentile(dat[col], 95)}, 99th Percentile: {np.nanpercentile(dat[col], 99)}" ) exit()
def create_cnn_dataset( root_directory: str, counterpart_catalog: str = "", split_fraction: float = 0.2, resize: Optional[Union[Tuple[int], int]] = None, rotation: Optional[Union[List[float], float]] = None, convert: bool = True, bands: List[str] = ( "iFApMag", "w1Mag", "gFApMag", "rFApMag", "zFApMag", "yFApMag", "w2Mag", "w3Mag", "w4Mag", ), vac_catalog: str = "", normalize: bool = True, subset: str = "", multi_rotate_only: Optional[Union[List[str], str]] = None, verbose: bool = False, **kwargs, ): """ Create COCO directory structure, if it doesn't already exist, split the image data, and save it to the correct directories, and create the COCO annotation file to be loaded into Detectron2, or other similar models :param split_fraction: Fraction of the data for the test set. the validation set is rolled into the test set. :param root_directory: root directory for the COCO dataset :param bands: The bands to include in the source :param resize: Image size to resize to, or None if not resizing :param convert: Whether to convert npy files to png, or to keep them in the original format, useful for SourceMapper :param verbose: Whether to print more data to stdout or not :param subset: Whether to limit ones to only the fluxlimit sources, if not empty, should be path to list of source filepaths to use :return: """ ( all_directory, train_directory, val_directory, test_directory, annotations_directory, ) = create_coco_style_directory_structure(root_directory, verbose=verbose) # Gather data from all_directory data_split = split_data(all_directory, val_split=split_fraction, test_split=split_fraction) if subset: # Keep only those already in the subset subset = np.load(subset, allow_pickle=True) for d in ["train", "test", "val"]: data_split[d] = data_split[d][np.isin(data_split[d], subset)] annotations_directory = os.path.join(annotations_directory, "subset") if multi_rotate_only: l_objects = get_lotss_objects(multi_rotate_only, False) # Get all multicomponent sources l_objects = l_objects[ l_objects[kwargs.get("association_name", "LGZ_Assoc")] > 1] multi_names = l_objects["Source_Name"].data else: multi_names = None if len(data_split["val"]) > 0: create_cnn_annotations( data_split["val"], json_dir=annotations_directory, image_destination_dir=val_directory, json_name=f"cnn_val_norm{normalize}_extra.pkl", pan_wise_location=counterpart_catalog, resize=resize, rotation=None, convert=convert, normalize=normalize, bands=bands, vac_catalog_location=vac_catalog, rotation_names=multi_names, verbose=verbose, ) create_cnn_annotations( data_split["train"], json_dir=annotations_directory, image_destination_dir=train_directory, json_name=f"cnn_train_test_norm{normalize}_extra.pkl", pan_wise_location=counterpart_catalog, resize=resize, rotation=None, convert=convert, normalize=normalize, bands=bands, vac_catalog_location=vac_catalog, rotation_names=multi_names, verbose=verbose, ) create_cnn_annotations( data_split["test"], json_dir=annotations_directory, image_destination_dir=test_directory, json_name=f"cnn_test_norm{normalize}_extra.pkl", pan_wise_location=counterpart_catalog, resize=resize, rotation=None, convert=convert, normalize=normalize, bands=bands, vac_catalog_location=vac_catalog, rotation_names=multi_names, verbose=verbose, ) create_cnn_annotations( data_split["train"], json_dir=annotations_directory, image_destination_dir=train_directory, json_name=f"cnn_train_norm{normalize}_extra.pkl", pan_wise_location=counterpart_catalog, resize=resize, rotation=rotation, convert=convert, normalize=normalize, bands=bands, vac_catalog_location=vac_catalog, rotation_names=multi_names, verbose=verbose, )
def create_source_dataset( cutout_directory: str, pan_wise_location: str, value_added_catalog_location: str, component_catalog_location: str, dr_two_location: str, bands: List[str] = ( "iFApMag", "w1Mag", "gFApMag", "rFApMag", "zFApMag", "yFApMag", "w2Mag", "w3Mag", "w4Mag", ), fixed_size: Optional[Union[int, float]] = None, filter_lgz: bool = True, verbose: bool = False, use_multiprocessing: bool = False, strict_filter: bool = False, filter_optical: bool = True, no_source: bool = False, num_threads: Optional[int] = os.cpu_count(), **kwargs, ): """ :param cutout_directory: Directory to store the cutouts :param pan_wise_location: The location of the PanSTARRS-ALLWISE catalog :param value_added_catalog_location: Location of the LoTSS Value Added Catalog :param dr_two_location: The location of the LoTSS DR2 Mosaic Locations :param use_multiprocessing: Whether to use multiprocessing :param num_threads: Number of threads to use, if multiprocessing is true :param strict_filter: Use the same filtering as for Jelle's subsample, with total flux > 10 mJy, and size > 15 arcseconds :param filter_optical: Whether to filter out sources with only optical sources or not :param filter_lgz: Whether to filter on LGZ_Size :return: """ l_objects = get_lotss_objects(value_added_catalog_location, False) print(len(l_objects)) size_name = kwargs.get("size_name", "LGZ_Size") optical_ra = kwargs.get("optical_ra", "ID_ra") optical_dec = kwargs.get("optical_dec", "ID_dec") if filter_lgz: l_objects = l_objects[~np.isnan(l_objects[size_name])] print(len(l_objects)) if filter_optical: if no_source: l_objects = l_objects[np.isnan(l_objects[optical_ra])] l_objects = l_objects[np.isnan(l_objects[optical_dec])] else: l_objects = l_objects[~np.isnan(l_objects[optical_ra])] l_objects = l_objects[~np.isnan(l_objects[optical_dec])] print(len(l_objects)) if strict_filter: l_objects = l_objects[l_objects[size_name] > 15.0] l_objects = l_objects[l_objects["Total_flux"] > 10.0] print(len(l_objects)) mosaic_names = set(l_objects["Mosaic_ID"]) print(len(l_objects)) print(mosaic_names) # exit() comp_catalog = get_lotss_objects(component_catalog_location, False) # Go through each object, creating the cutout and saving to a directory # Create a directory structure identical for detectron2 ( all_directory, train_directory, val_directory, test_directory, annotations_directory, ) = create_coco_style_directory_structure(cutout_directory) # Now go through each source in l_objects and create a cutout of the fits file # Open the Panstarrs and WISE catalogue if fixed_size is False: fixed_size = None if use_multiprocessing: pool = multiprocessing.Pool(num_threads) pool.starmap( create_cutouts, zip( mosaic_names, repeat(l_objects), repeat(pan_wise_location), repeat(comp_catalog), repeat(dr_two_location), repeat(all_directory), repeat(bands), repeat(fixed_size), repeat(verbose), repeat(**kwargs), ), ) else: for mosaic in mosaic_names: create_cutouts( mosaic=mosaic, value_added_catalog=l_objects, pan_wise_catalog=pan_wise_location, component_catalog=comp_catalog, mosaic_location=dr_two_location, save_cutout_directory=all_directory, bands=bands, source_size=fixed_size, verbose=verbose, **kwargs, )
def make_single_cnn_set( image_names: List[Path], record_list: List[Any], set_number: int, image_destination_dir: Optional[str], pan_wise_location: str = "", bands: List[str] = ( "iFApMag", "w1Mag", "gFApMag", "rFApMag", "zFApMag", "yFApMag", "w2Mag", "w3Mag", "w4Mag", ), resize: Optional[Union[int, List[int]]] = None, rotation: Optional[Union[List[float], float]] = None, convert: bool = False, vac_catalog_location: str = "", normalize: bool = True, **kwargs, ): pan_wise_catalog = fits.open(pan_wise_location, memmap=True) pan_wise_catalog = pan_wise_catalog[1].data vac_catalog = get_lotss_objects(vac_catalog_location, verbose=False) for i, image_name in enumerate(image_names): # Get image dimensions and insert them in a python dict record_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".record.{normalize}.npy") if convert: image_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".cnn.{set_number}.png") else: if rotation is not None and rotation.any() > 0: image_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".cnn.{set_number}.{normalize}.npy", ) wcs_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".cnn.{set_number}.{normalize}.wcs.npy", ) record_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".record.{set_number}.{normalize}.npy", ) else: image_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".cnn.{normalize}.npy") wcs_dest_filename = os.path.join( image_destination_dir, image_name.stem + f".cnn.{normalize}.wcs.npy") if not os.path.exists(os.path.join(image_dest_filename)): (image, cutouts, proposal_boxes, wcs) = np.load( image_name, allow_pickle=True) # mmap_mode might allow faster read print(image.shape) image = np.moveaxis(image, 0, 2) cutout = Cutout2D( image[:, :, 0], position=(int(image.shape[0] / 2), int(image.shape[1] / 2)), size=(int(image.shape[0]), int(image.shape[1])), wcs=wcs, ) wcs = cutout.wcs image = np.nan_to_num(image) # Need this to convert the bbox coordinates into the correct format ( image, cutouts, proposal_boxes, ) = augment_image_and_bboxes( image, cutouts=cutouts, proposal_boxes=proposal_boxes, angle=0, new_size=resize, verbose=False, ) width, height, depth = np.shape(image) # First R (Radio) channel image = image[:, :, 0] image_clip = np.copy(image) image_none = np.copy(image) image = convert_to_valid_color( image, clip=True, lower_clip=0.0, upper_clip=1000, normalize=normalize, scaling="sqrt", ) image_clip = convert_to_valid_color( image_clip, clip=True, lower_clip=0.0, upper_clip=1000, normalize=normalize, scaling=None, ) image_none = convert_to_valid_color( image_none, clip=False, normalize=False, scaling="sqrt", ) image = np.ma.filled( image, fill_value=0.0 ) # convert back from masked array to normal array image_clip = np.ma.filled( image_clip, fill_value=0.0 ) # convert back from masked array to normal array image_none = np.ma.filled( image_none, fill_value=0.0 ) # convert back from masked array to normal array # Now restack into 3 channel image image = np.dstack((image, image_clip, image_none)) image = np.nan_to_num(image) # Only take radio np.save(image_dest_filename, image) # Save to the final destination np.save(wcs_dest_filename, wcs) else: image = np.load(image_dest_filename) wcs = np.load(wcs_dest_filename, allow_pickle=True) height, width, depth = np.shape(image) record = { "file_name": image_dest_filename, "image_id": i, "height": height, "width": width, "depth": depth, } if not os.path.exists(os.path.join(record_dest_filename)): source = vac_catalog[vac_catalog["Source_Name"] == image_name.stem] # All optical sources in 150 arcsecond radius of the point ( objects, distances, angles, source_coords, sky_coords, ) = determine_visible_catalogue_source_and_separation( source["RA"], source["DEC"], np.max([ source[kwargs.get("size_name", "LGZ_Size")] * 1.5 / 3600.0, 30. / 3600. ]), # 20. is min cutout size pan_wise_catalog, ) # Sort from closest to farthest distance idx = np.argsort(distances) objects = objects[idx] distances = distances[idx] angles = angles[idx] sky_coords = sky_coords[idx] optical_sources = [] optical_labels = [] for j, obj in enumerate(objects): optical_sources.append([]) # 999999 == '' in source for AllWISE print( f"Object: {obj['objID']} Source: {source['objID'].data[0]} \n {obj['AllWISE']} {source['AllWISE'].data[0]}" ) if (obj["objID"] == source["objID"].data[0] and obj["AllWISE"] == source["AllWISE"].data[0]): optical_labels.append(1) # Optical Source else: optical_labels.append(0) optical_sources[-1].append(obj["objID"]) optical_sources[-1].append(obj["AllWISE"]) optical_sources[-1].append(obj["ra"]) optical_sources[-1].append(obj["dec"]) optical_sources[-1].append(distances[j]) optical_sources[-1].append(angles[j]) optical_sources[-1].append(obj["z_best"]) for layer in bands: value = np.nan_to_num(obj[layer]) if normalize: # Scale to between 0 and 1 for 10 to 28 magnitude value = np.clip(value, 10.0, 28.0) value = (value - 10.0) / (28.0 - 10.0) optical_sources[-1].append(value) record["optical_sources"] = optical_sources record["optical_labels"] = optical_labels record["source_skycoord"] = source_coords record["optical_skycoords"] = sky_coords record["wcs"] = wcs if rotation is not None: record["rotation"] = rotation[set_number] else: record["rotation"] = 0.0 np.save(record_dest_filename, record) else: record = np.load(record_dest_filename, fix_imports=True, allow_pickle=True) # Now add the labels, so need to know which optical source is the true one record_list.append(record)