示例#1
0
from os import listdir
from os.path import isfile, join

import matplotlib.pyplot as plt
import numpy as np
from astropy.coordinates import SkyCoord
from lofarnn.data.cutouts import convert_to_valid_color
from astropy.io import fits
from astropy.table import Table
from lofarnn.models.dataloaders.utils import get_lotss_objects
from astropy import units as u
from radio_beam import Beam
from astropy.wcs import WCS
from astropy.wcs.utils import skycoord_to_pixel, proj_plane_pixel_scales
lobjects = get_lotss_objects(
    "/home/bieker/Downloads/LOFAR_HBA_T1_DR1_merge_ID_optical_f_v1.2_restframe.fits"
)

lobjects.info()
print(
    f"Min/Max: ObjID: {np.min(lobjects['objID'].data)}/{np.max(lobjects['objID'].data)}"
)
print(f"Min/Max: PS: {lobjects['AllWISE'].data}")
#exit()

gauss_catalog = "/home/bieker/Downloads/LOFAR_HBA_T1_DR1_catalog_v0.99.gaus.fits"
component_catalog = "/home/bieker/Downloads/LOFAR_HBA_T1_DR1_merge_ID_v1.2.comp.fits"
# Use component Name from comp catalog to select gaussian
gauss_cat = Table.read(gauss_catalog).to_pandas()
component_catalog = Table.read(component_catalog).to_pandas()
"""
示例#2
0
import matplotlib.pyplot as plt
import numpy as np

from lofarnn.models.dataloaders.utils import get_lotss_objects

dat = get_lotss_objects("/home/s2153246/data/dr2_combined.fits")

for col in ["MAG_R", "MAG_W1", "MAG_W2"]:
    plt.hist(
        np.nan_to_num(dat[col]),
        bins=np.arange(np.nanpercentile(dat[col], 2),
                       np.nanpercentile(dat[col], 98), 0.5),
    )
    plt.title(col)
    plt.savefig(f"Legacy_{col}.png", dpi=300)
    print(
        f"{col}: Max: {np.nanmax(dat[col])}, Min: {np.nanmin(dat[col])}, Median: {np.nanmedian(dat[col])}, "
        f"1st Percentile: {np.nanpercentile(dat[col], 1)}, 5th Percentile: {np.nanpercentile(dat[col], 5)}, 95th Percentile: {np.nanpercentile(dat[col], 95)}, 99th Percentile: {np.nanpercentile(dat[col], 99)}"
    )
exit()
示例#3
0
def create_cnn_dataset(
    root_directory: str,
    counterpart_catalog: str = "",
    split_fraction: float = 0.2,
    resize: Optional[Union[Tuple[int], int]] = None,
    rotation: Optional[Union[List[float], float]] = None,
    convert: bool = True,
    bands: List[str] = (
        "iFApMag",
        "w1Mag",
        "gFApMag",
        "rFApMag",
        "zFApMag",
        "yFApMag",
        "w2Mag",
        "w3Mag",
        "w4Mag",
    ),
    vac_catalog: str = "",
    normalize: bool = True,
    subset: str = "",
    multi_rotate_only: Optional[Union[List[str], str]] = None,
    verbose: bool = False,
    **kwargs,
):
    """
    Create COCO directory structure, if it doesn't already exist, split the image data, and save it to the correct
    directories, and create the COCO annotation file to be loaded into Detectron2, or other similar models
    :param split_fraction: Fraction of the data for the test set. the validation set is rolled into the test set.
    :param root_directory: root directory for the COCO dataset
    :param bands: The bands to include in the source
    :param resize: Image size to resize to, or None if not resizing
    :param convert: Whether to convert npy files to png, or to keep them in the original format, useful for SourceMapper
    :param verbose: Whether to print more data to stdout or not
    :param subset: Whether to limit ones to only the fluxlimit sources, if not empty, should be path to list of source filepaths to use
    :return:
    """

    (
        all_directory,
        train_directory,
        val_directory,
        test_directory,
        annotations_directory,
    ) = create_coco_style_directory_structure(root_directory, verbose=verbose)

    # Gather data from all_directory
    data_split = split_data(all_directory,
                            val_split=split_fraction,
                            test_split=split_fraction)
    if subset:
        # Keep only those already in the subset
        subset = np.load(subset, allow_pickle=True)
        for d in ["train", "test", "val"]:
            data_split[d] = data_split[d][np.isin(data_split[d], subset)]
        annotations_directory = os.path.join(annotations_directory, "subset")
    if multi_rotate_only:
        l_objects = get_lotss_objects(multi_rotate_only, False)
        # Get all multicomponent sources
        l_objects = l_objects[
            l_objects[kwargs.get("association_name", "LGZ_Assoc")] > 1]
        multi_names = l_objects["Source_Name"].data
    else:
        multi_names = None
    if len(data_split["val"]) > 0:
        create_cnn_annotations(
            data_split["val"],
            json_dir=annotations_directory,
            image_destination_dir=val_directory,
            json_name=f"cnn_val_norm{normalize}_extra.pkl",
            pan_wise_location=counterpart_catalog,
            resize=resize,
            rotation=None,
            convert=convert,
            normalize=normalize,
            bands=bands,
            vac_catalog_location=vac_catalog,
            rotation_names=multi_names,
            verbose=verbose,
        )
    create_cnn_annotations(
        data_split["train"],
        json_dir=annotations_directory,
        image_destination_dir=train_directory,
        json_name=f"cnn_train_test_norm{normalize}_extra.pkl",
        pan_wise_location=counterpart_catalog,
        resize=resize,
        rotation=None,
        convert=convert,
        normalize=normalize,
        bands=bands,
        vac_catalog_location=vac_catalog,
        rotation_names=multi_names,
        verbose=verbose,
    )
    create_cnn_annotations(
        data_split["test"],
        json_dir=annotations_directory,
        image_destination_dir=test_directory,
        json_name=f"cnn_test_norm{normalize}_extra.pkl",
        pan_wise_location=counterpart_catalog,
        resize=resize,
        rotation=None,
        convert=convert,
        normalize=normalize,
        bands=bands,
        vac_catalog_location=vac_catalog,
        rotation_names=multi_names,
        verbose=verbose,
    )
    create_cnn_annotations(
        data_split["train"],
        json_dir=annotations_directory,
        image_destination_dir=train_directory,
        json_name=f"cnn_train_norm{normalize}_extra.pkl",
        pan_wise_location=counterpart_catalog,
        resize=resize,
        rotation=rotation,
        convert=convert,
        normalize=normalize,
        bands=bands,
        vac_catalog_location=vac_catalog,
        rotation_names=multi_names,
        verbose=verbose,
    )
示例#4
0
def create_source_dataset(
    cutout_directory: str,
    pan_wise_location: str,
    value_added_catalog_location: str,
    component_catalog_location: str,
    dr_two_location: str,
    bands: List[str] = (
        "iFApMag",
        "w1Mag",
        "gFApMag",
        "rFApMag",
        "zFApMag",
        "yFApMag",
        "w2Mag",
        "w3Mag",
        "w4Mag",
    ),
    fixed_size: Optional[Union[int, float]] = None,
    filter_lgz: bool = True,
    verbose: bool = False,
    use_multiprocessing: bool = False,
    strict_filter: bool = False,
    filter_optical: bool = True,
    no_source: bool = False,
    num_threads: Optional[int] = os.cpu_count(),
    **kwargs,
):
    """

    :param cutout_directory: Directory to store the cutouts
    :param pan_wise_location: The location of the PanSTARRS-ALLWISE catalog
    :param value_added_catalog_location: Location of the LoTSS Value Added Catalog
    :param dr_two_location: The location of the LoTSS DR2 Mosaic Locations
    :param use_multiprocessing: Whether to use multiprocessing
    :param num_threads: Number of threads to use, if multiprocessing is true
    :param strict_filter: Use the same filtering as for Jelle's subsample, with total flux > 10 mJy, and size > 15 arcseconds
    :param filter_optical: Whether to filter out sources with only optical sources or not
    :param filter_lgz: Whether to filter on LGZ_Size
    :return:
    """
    l_objects = get_lotss_objects(value_added_catalog_location, False)
    print(len(l_objects))
    size_name = kwargs.get("size_name", "LGZ_Size")
    optical_ra = kwargs.get("optical_ra", "ID_ra")
    optical_dec = kwargs.get("optical_dec", "ID_dec")

    if filter_lgz:
        l_objects = l_objects[~np.isnan(l_objects[size_name])]
        print(len(l_objects))
    if filter_optical:
        if no_source:
            l_objects = l_objects[np.isnan(l_objects[optical_ra])]
            l_objects = l_objects[np.isnan(l_objects[optical_dec])]
        else:
            l_objects = l_objects[~np.isnan(l_objects[optical_ra])]
            l_objects = l_objects[~np.isnan(l_objects[optical_dec])]
        print(len(l_objects))
    if strict_filter:
        l_objects = l_objects[l_objects[size_name] > 15.0]
        l_objects = l_objects[l_objects["Total_flux"] > 10.0]
        print(len(l_objects))
    mosaic_names = set(l_objects["Mosaic_ID"])
    print(len(l_objects))
    print(mosaic_names)
    # exit()
    comp_catalog = get_lotss_objects(component_catalog_location, False)

    # Go through each object, creating the cutout and saving to a directory
    # Create a directory structure identical for detectron2
    (
        all_directory,
        train_directory,
        val_directory,
        test_directory,
        annotations_directory,
    ) = create_coco_style_directory_structure(cutout_directory)

    # Now go through each source in l_objects and create a cutout of the fits file
    # Open the Panstarrs and WISE catalogue
    if fixed_size is False:
        fixed_size = None

    if use_multiprocessing:
        pool = multiprocessing.Pool(num_threads)
        pool.starmap(
            create_cutouts,
            zip(
                mosaic_names,
                repeat(l_objects),
                repeat(pan_wise_location),
                repeat(comp_catalog),
                repeat(dr_two_location),
                repeat(all_directory),
                repeat(bands),
                repeat(fixed_size),
                repeat(verbose),
                repeat(**kwargs),
            ),
        )
    else:
        for mosaic in mosaic_names:
            create_cutouts(
                mosaic=mosaic,
                value_added_catalog=l_objects,
                pan_wise_catalog=pan_wise_location,
                component_catalog=comp_catalog,
                mosaic_location=dr_two_location,
                save_cutout_directory=all_directory,
                bands=bands,
                source_size=fixed_size,
                verbose=verbose,
                **kwargs,
            )
示例#5
0
def make_single_cnn_set(
    image_names: List[Path],
    record_list: List[Any],
    set_number: int,
    image_destination_dir: Optional[str],
    pan_wise_location: str = "",
    bands: List[str] = (
        "iFApMag",
        "w1Mag",
        "gFApMag",
        "rFApMag",
        "zFApMag",
        "yFApMag",
        "w2Mag",
        "w3Mag",
        "w4Mag",
    ),
    resize: Optional[Union[int, List[int]]] = None,
    rotation: Optional[Union[List[float], float]] = None,
    convert: bool = False,
    vac_catalog_location: str = "",
    normalize: bool = True,
    **kwargs,
):
    pan_wise_catalog = fits.open(pan_wise_location, memmap=True)
    pan_wise_catalog = pan_wise_catalog[1].data
    vac_catalog = get_lotss_objects(vac_catalog_location, verbose=False)
    for i, image_name in enumerate(image_names):
        # Get image dimensions and insert them in a python dict
        record_dest_filename = os.path.join(
            image_destination_dir,
            image_name.stem + f".record.{normalize}.npy")
        if convert:
            image_dest_filename = os.path.join(
                image_destination_dir,
                image_name.stem + f".cnn.{set_number}.png")
        else:
            if rotation is not None and rotation.any() > 0:
                image_dest_filename = os.path.join(
                    image_destination_dir,
                    image_name.stem + f".cnn.{set_number}.{normalize}.npy",
                )
                wcs_dest_filename = os.path.join(
                    image_destination_dir,
                    image_name.stem + f".cnn.{set_number}.{normalize}.wcs.npy",
                )
                record_dest_filename = os.path.join(
                    image_destination_dir,
                    image_name.stem + f".record.{set_number}.{normalize}.npy",
                )
            else:
                image_dest_filename = os.path.join(
                    image_destination_dir,
                    image_name.stem + f".cnn.{normalize}.npy")
                wcs_dest_filename = os.path.join(
                    image_destination_dir,
                    image_name.stem + f".cnn.{normalize}.wcs.npy")
        if not os.path.exists(os.path.join(image_dest_filename)):
            (image, cutouts, proposal_boxes, wcs) = np.load(
                image_name,
                allow_pickle=True)  # mmap_mode might allow faster read
            print(image.shape)
            image = np.moveaxis(image, 0, 2)
            cutout = Cutout2D(
                image[:, :, 0],
                position=(int(image.shape[0] / 2), int(image.shape[1] / 2)),
                size=(int(image.shape[0]), int(image.shape[1])),
                wcs=wcs,
            )
            wcs = cutout.wcs
            image = np.nan_to_num(image)
            # Need this to convert the bbox coordinates into the correct format
            (
                image,
                cutouts,
                proposal_boxes,
            ) = augment_image_and_bboxes(
                image,
                cutouts=cutouts,
                proposal_boxes=proposal_boxes,
                angle=0,
                new_size=resize,
                verbose=False,
            )
            width, height, depth = np.shape(image)

            # First R (Radio) channel
            image = image[:, :, 0]
            image_clip = np.copy(image)
            image_none = np.copy(image)
            image = convert_to_valid_color(
                image,
                clip=True,
                lower_clip=0.0,
                upper_clip=1000,
                normalize=normalize,
                scaling="sqrt",
            )
            image_clip = convert_to_valid_color(
                image_clip,
                clip=True,
                lower_clip=0.0,
                upper_clip=1000,
                normalize=normalize,
                scaling=None,
            )
            image_none = convert_to_valid_color(
                image_none,
                clip=False,
                normalize=False,
                scaling="sqrt",
            )
            image = np.ma.filled(
                image, fill_value=0.0
            )  # convert back from masked array to normal array
            image_clip = np.ma.filled(
                image_clip, fill_value=0.0
            )  # convert back from masked array to normal array
            image_none = np.ma.filled(
                image_none, fill_value=0.0
            )  # convert back from masked array to normal array
            # Now restack into 3 channel image
            image = np.dstack((image, image_clip, image_none))
            image = np.nan_to_num(image)  # Only take radio
            np.save(image_dest_filename,
                    image)  # Save to the final destination
            np.save(wcs_dest_filename, wcs)
        else:
            image = np.load(image_dest_filename)
            wcs = np.load(wcs_dest_filename, allow_pickle=True)
            height, width, depth = np.shape(image)

        record = {
            "file_name": image_dest_filename,
            "image_id": i,
            "height": height,
            "width": width,
            "depth": depth,
        }
        if not os.path.exists(os.path.join(record_dest_filename)):
            source = vac_catalog[vac_catalog["Source_Name"] == image_name.stem]
            # All optical sources in 150 arcsecond radius of the point
            (
                objects,
                distances,
                angles,
                source_coords,
                sky_coords,
            ) = determine_visible_catalogue_source_and_separation(
                source["RA"],
                source["DEC"],
                np.max([
                    source[kwargs.get("size_name", "LGZ_Size")] * 1.5 / 3600.0,
                    30. / 3600.
                ]),  # 20. is min cutout size
                pan_wise_catalog,
            )
            # Sort from closest to farthest distance
            idx = np.argsort(distances)
            objects = objects[idx]
            distances = distances[idx]
            angles = angles[idx]
            sky_coords = sky_coords[idx]
            optical_sources = []
            optical_labels = []
            for j, obj in enumerate(objects):
                optical_sources.append([])
                # 999999 == '' in source for AllWISE
                print(
                    f"Object: {obj['objID']} Source: {source['objID'].data[0]} \n {obj['AllWISE']} {source['AllWISE'].data[0]}"
                )
                if (obj["objID"] == source["objID"].data[0]
                        and obj["AllWISE"] == source["AllWISE"].data[0]):
                    optical_labels.append(1)  # Optical Source
                else:
                    optical_labels.append(0)
                optical_sources[-1].append(obj["objID"])
                optical_sources[-1].append(obj["AllWISE"])
                optical_sources[-1].append(obj["ra"])
                optical_sources[-1].append(obj["dec"])
                optical_sources[-1].append(distances[j])
                optical_sources[-1].append(angles[j])
                optical_sources[-1].append(obj["z_best"])
                for layer in bands:
                    value = np.nan_to_num(obj[layer])
                    if normalize:  # Scale to between 0 and 1 for 10 to 28 magnitude
                        value = np.clip(value, 10.0, 28.0)
                        value = (value - 10.0) / (28.0 - 10.0)
                    optical_sources[-1].append(value)
            record["optical_sources"] = optical_sources
            record["optical_labels"] = optical_labels
            record["source_skycoord"] = source_coords
            record["optical_skycoords"] = sky_coords
            record["wcs"] = wcs
            if rotation is not None:
                record["rotation"] = rotation[set_number]
            else:
                record["rotation"] = 0.0
            np.save(record_dest_filename, record)
        else:
            record = np.load(record_dest_filename,
                             fix_imports=True,
                             allow_pickle=True)

        # Now add the labels, so need to know which optical source is the true one
        record_list.append(record)