Пример #1
0
def convert(path_datacube, cubename, path_cutouts, frac_true):
    """Convert simulated data before starting training"""

    outdir = os.path.join(path_datacube, "datacube")
    mkdir_p(outdir)

    # Get all the prefixes corresponding to one field
    truelist = glob.glob(os.path.join(path_cutouts, "true", "*.fits"))
    falselist = glob.glob(os.path.join(path_cutouts, "false", "*.fits"))
    # output cube name
    npz_name = "%s.npz" % cubename
    Ncand_true = len(truelist)
    Ncand_false = len(falselist)
    if Ncand_true > Ncand_false:
        Ncand_true_max = floor(2*Ncand_false*frac_true)
        Ncand_false_max = floor(2*Ncand_false*(1-frac_true))
    elif Ncand_true <= Ncand_false:
        Ncand_true_max = floor(2*Ncand_true*frac_true)
        Ncand_false_max = floor(2*Ncand_true*(1-frac_true))

    Ncand_tot = len(truelist) + len(falselist)
    Ncand = Ncand_true_max + Ncand_false_max
    cube = []  # np.zeros((Ncand, 64, 64))
    labels = []
    mags = []
    errmags = []
    cand_ids = []
    filters = []
    counter_true = 0
    for cand in truelist:
        if counter_true < Ncand_true_max:
            hdus = fits.open(cand, memmap=False)
            head = hdus[0].header
            # Exclude cases too close to the edge
            # Meaning they are located at less than the defined size
            # of the small images
            if head["EDGE"] == "False":
                labels += [1]
                mags += [head["MAG"]]
                errmags += [head["MAGERR"]]
                filters += [head["FILTER"]]
                cand_ids += [head["CANDID"]]
                cube.append(hdus[0].data)
            hdus.close()
        else:
            break
        counter_true = counter_true+1

    counter_false = 0
    for cand in falselist:
        if counter_false < Ncand_false_max:
            hdus = fits.open(cand, memmap=False)
            head = hdus[0].header
            # if hdus[0].data.shape != (64, 64):
            #    print ('skip %s as its shape is not (64,64): (%d,%d)'
            #           % (cand, hdus[0].data.shape[0], hdus[0].data.shape[1]))
            # Exclude cases too close to the edge
            # Meaning they are located at less than the defined size
            # of the small images
            if head["EDGE"] == "False":
                labels += [0]
                mags += [head["MAG"]]
                errmags += [head["MAGERR"]]
                filters += [head["FILTER"]]
                cand_ids += [head["CANDID"]]
                cube.append(hdus[0].data)
            hdus.close()
        else:
            break
        counter_false = counter_false+1

    print("The datacube contains",
          str(Ncand),
          "candidates with Ntrue =",
          str(counter_true),
          "and Nfalse =",
          str(counter_false))
    print("Converting and reshaping arrays ...")
    # Convert lists to B.I.P. NumPy arrays
    # Check whether all candidates has 64x64 pixels
    # If not, delete them
    # This can happen at the edge of images
    # for i in range(len(cube)):
    #    if np.array(cube[i]).shape != (64, 64):
    #        print (i, np.array(cube[i]).shape)
    #        del cube[i]
    cube = np.asarray(cube, dtype=np.float32)
    if cube.ndim < 4:
        cube = np.reshape(
            cube, [
                cube.shape[0], cube.shape[1], cube.shape[2], 1])
    else:
        cube = np.moveaxis(cube, 1, -1)

    # Report dimensions of the data cube
    print("Saving %d %d×%d×%d image datacube ..." %
          cube.shape, end="\r", flush=True)
    np.savez(
        os.path.join(outdir, npz_name),
        cube=cube,
        labels=labels,
        mags=mags,
        errmags=errmags,
        filters=filters,
        candids=cand_ids
    )

    print("Saved to " + os.path.join(outdir, npz_name))
Пример #2
0
def makestats(path, radius=2):
    """ Create some statistics on the simulated events """
    mkdir_p(os.path.join(path, 'CheckSim'))
    candidates_list = getCandPos(path)

    # Load file crossmatching the detected candidates with simulated events
    # Try to load file if already exists
    # Otherwise create it
    try:
        crossmatch = ascii.read(os.path.join(path, "crossmatch.dat"))
    except BaseException:
        crossmatch = crossmatch_detections(path,
                                           candidates_list,
                                           radius=radius)

    # mask_det = crossmatch["Nmatches"] == 1
    # Actually take everything with a match.
    mask_det = crossmatch["Nmatches"] > 0

    bands = crossmatch.group_by("filter").groups.keys

    # plot histogram of simulated sources magnitude
    plt.figure()
    n1, bins1, patches = plt.hist(
        crossmatch["mag"],
        30,
        facecolor="C0",
        alpha=0.75,
        density=False,
        stacked=False,
        label="Sim",
    )
    n2, bins2, patches = plt.hist(
        crossmatch["mag"][mask_det],
        bins1,
        facecolor="C1",
        alpha=0.5,
        density=False,
        stacked=False,
        label="Det",
    )
    plt.xlabel("mag")
    plt.ylabel("N")
    plt.title("All filters")
    plt.grid(True)
    plt.legend()
    plt.savefig(os.path.join(path, "CheckSim/sim_mag_distrib_allbands.png"))

    # plot fraction of detection for a given magnitude bin
    plt.figure()
    x = (bins1[1:] + bins1[:-1]) / 2
    plt.plot(x, n2 / n1)
    plt.xlabel('Simulated magnitude')
    plt.ylabel('Fraction (%)')
    plt.title('Fraction of simulated events detected.')
    plt.savefig(os.path.join(path, 'CheckSim/detection_fraction_allbands.png'))

    for band in bands:
        mask_band = crossmatch["filter"] == band[0]
        mask = np.bitwise_and(mask_det, mask_band)
        plt.figure()
        n1, bins1, patches = plt.hist(
            crossmatch["mag"][mask_band],
            30,
            facecolor="C0",
            alpha=0.75,
            density=False,
            stacked=False,
            label="Sim",
        )
        n2, bins2, patches = plt.hist(
            crossmatch["mag"][mask],
            bins1,
            facecolor="C1",
            alpha=0.5,
            density=False,
            stacked=False,
            label="Det",
        )
        plt.xlabel("mag")
        plt.ylabel("N")
        plt.title("%s band" % band[0])
        plt.grid(True)
        plt.legend()
        plt.savefig(
            os.path.join(path, "CheckSim/sim_mag_distrib_%s.png" % band[0]))
Пример #3
0
def filter_candidates(sources,
                      FWHM_ratio_lower=0.5,
                      FWHM_ratio_upper=5.0,
                      CNN_model=None,
                      CNN_thres=0.0,
                      makecutout=True,
                      size=100,
                      size_cnn=32,
                      fmt='png',
                      outLevel=1,
                      nb_threads=8,
                      combined=False):
    """Filter transient candidates"""
    print('Filter candidates')
    # Take first candidate to extract the path where to store the file
    # No need to chack if substraction was performed,
    # as if it did only the ones from substracted files are with 'Match' == Y
    path, fname_ext = os.path.split(sources['filenames'][0])
    # Get rid of the extension to keep only the name
    fname2, extension = os.path.splitext(fname_ext)
    # Get rid of the _reg pattern
    fname2 = fname2.split('_ref')[0]

    # First get the sources not crossmatching with sources in catalogs
    mask_cat = sources['Match'] == 'N'

    # Remove candidates on the edges
    mask_edge = sources["edge"] == 'N'

    # Remove sources with FWHM ratio outside the desired range
    FWHM_ratio = sources["FWHM"] / sources["FWHMPSF"]
    mask_FWHM = (FWHM_ratio >= FWHM_ratio_lower) & \
                (FWHM_ratio <= FWHM_ratio_upper)

    mask_tot = mask_cat & mask_edge & mask_FWHM
    # Use a trained CNN model to filter candidates.
    if CNN_model is not None:
        print('Create fits cutouts for CNN')
        # Create fits cutouts to be be given to the CNN model
        path_CNN_cutouts = os.path.join(path, 'CNN_cutouts')
        mkdir_p(path_CNN_cutouts)
        args_data = []
        outnames = []
        info_dicts = []
        for cand in sources:
            coords = [cand['_RAJ2000'], cand['_DEJ2000']]
            outname = os.path.join(path_CNN_cutouts,
                                   'candidate_%d.fits' % (cand['idx']))
            info_dict = {}
            info_dict['RA'] = cand['_RAJ2000']
            info_dict['DEC'] = cand['_DEJ2000']
            info_dict['XPOS'] = cand['Xpos']
            info_dict['YPOS'] = cand['Ypos']
            info_dict['FILE'] = cand['filenames']
            info_dict['CANDID'] = cand['idx']
            info_dict['MAG'] = cand['mag_calib']
            info_dict['MAGERR'] = cand['mag_calib_err']
            info_dict['FWHM'] = cand['FWHM']
            info_dict['FWHMPSF'] = cand['FWHMPSF']

            args_data.append([
                cand['filenames'],
                coords,
                "world",
                [size_cnn, size_cnn],
                -1,
            ])
            outnames.append(outname)
            info_dicts.append(info_dict)
        """
        # Run the make_sub_image in asynchroneous parallel
        # using many processes.
        # Need to cut the args list in the number of required threads
        # There is may be another way?
        N_sources = len(sources)
        # Check if there less data than number of threads.
        if N_sources < nb_threads:
            nb_threads = 1
        Ncut = int(N_sources / nb_threads)
        args_threads = []
        idx_stop = []
        for i in range(nb_threads):
            if i == 0:
                args_threads.append(args[i * Ncut : (i+1) * Ncut])
                idx_stop.append((i+1) * Ncut)
            elif i > 0 and i < nb_threads-1:
                args_threads.append(args[i * Ncut : (i+1) * Ncut])
                idx_stop.append((i+1) * Ncut)
            elif i == nb_threads-1:
                args_threads.append(args[i * Ncut : N_sources])
                idx_stop.append(N_sources)
            if idx_stop[-1] >= N_sources:
                break
        args_threads = np.array(args_threads)
        """
        args_data = np.array(args_data)
        pool = mp.Pool(nb_threads)
        # call apply_async() without callback
        """
        result_objects = [pool.apply_async(make_sub_image,
            args=(j[0,:], j[1,:], j[2,:], j[3,:], j[4,:]))
            for j in args_threads]
        """
        result_objects = [
            pool.apply_async(make_sub_image,
                             args=(args_data[:, 0], args_data[:,
                                                              1], args_data[:,
                                                                            2],
                                   args_data[:, 3], args_data[:, 4]))
        ]

        # result_objects is a list of pool.ApplyResult objects
        results = [r.get() for r in result_objects]

        # Don't forget to close
        pool.close()
        pool.join()
        results = np.array(results[0])
        # Create fits cutouts
        p = mp.Pool(nb_threads)
        args = [[a, b, c, d, e, f]
                for a, b, c, d, e, f in zip(results[0, :], outnames, results[
                    1, :], results[2, :], results[3, :], info_dicts)]
        p.starmap(make_fits, args)
        p.close()

        # Run CNN model to associate a probability to each cutout
        # The size of the cutout should be the same as the ones used
        # for the CNN training
        print("Use trained CNN model")
        infer(path_CNN_cutouts, CNN_model, 0.1)

        # Add the probability to the canidates table.
        infer_table = ascii.read(
            os.path.join(path_CNN_cutouts, 'infer_results.dat'))
        sources = join(sources,
                       infer_table['idx', 'label0', 'label1'],
                       join_type='left')

        # keep only transients that are above the threshold
        mask_CNN = sources['label1'] >= CNN_thres
        mask_tot = mask_tot & mask_CNN

    # Write output file.
    candidates = sources[mask_tot]
    # Create ID to start from 1.
    candidates['cand_ID'] = np.arange(len(candidates)) + 1
    # Rename colums
    if 'label0' in candidates.colnames:
        candidates.rename_column('label0', 'P_False')
        candidates.rename_column('label1', 'P_True')
    candidates.write(os.path.join(path, fname2 + '_candidates.dat'),
                     format='ascii.commented_header',
                     overwrite=True)

    # Update
    print('Make cutouts')
    # Extract small image centered on candidates passing the filters
    if makecutout:
        path_cutout = os.path.join(path, 'cutouts')
        mkdir_p(path_cutout)
        args_data = []
        outnames = []
        if combined:
            args_combined = []
            path_cutout_combined = os.path.join(path_cutout, 'combined')
            mkdir_p(path_cutout_combined)
        for cand in candidates:
            coords = [cand['_RAJ2000'], cand['_DEJ2000']]
            outname = os.path.join(path_cutout,
                                   'candidate_%d.%s' % (cand['cand_ID'], fmt))
            if combined:
                outname_combined = os.path.join(
                    path_cutout_combined,
                    'candidate_%d_comb.%s' % (cand['cand_ID'], 'png'))

            header = fits.getheader(cand['OriginalIma'])
            try:
                date = Time(header["DATE-OBS"], format="fits")
                # convert in GPS time
                # date_JD = date.jd
            except BaseException:
                date = Time(header["MJD-OBS"], format="mjd")
            date.format = 'iso'
            if fmt != 'fits' or combined:
                _coords = SkyCoord(cand['_RAJ2000'],
                                   cand['_DEJ2000'],
                                   unit=(u.degree, u.degree),
                                   frame='icrs')
                coords_sexa = _coords.to_string(style='hmsdms')
                title = "RA Dec: %s \n" % coords_sexa + \
                        "Time (UTC): %s \n" % (date.value) + \
                        "Mag: %.2f +/- %.2f     " % (cand['mag_calib'],
                                                     cand['mag_calib_err']) + \
                        "     FWHM_ratio: %.2f" % (
                            cand['FWHM']/cand['FWHMPSF'])
                if CNN_model is not None:
                    title += "     CNN proba: %.2f " % cand['P_True']

            args_data.append([
                cand['filenames'],
                coords,
                "world",
                [size_cnn, size_cnn],
                -1,
            ])
            outnames.append(outname)

            if combined:
                args_combined.append(
                    [[cand['OriginalIma'], cand['RefIma'],
                      cand['filenames']], coords, "world", outname_combined,
                     [size, size], -1, title])

        # Create sub-array
        args_data = np.array(args_data)
        pool = mp.Pool(nb_threads)
        # call apply_async() without callback
        result_objects = [
            pool.apply_async(make_sub_image,
                             args=(args_data[:, 0], args_data[:,
                                                              1], args_data[:,
                                                                            2],
                                   args_data[:, 3], args_data[:, 4]))
        ]

        # result_objects is a list of pool.ApplyResult objects
        results = [r.get() for r in result_objects]
        # Don't forget to close
        pool.close()
        pool.join()
        results = np.array(results[0])
        # Create fits cutouts
        p = mp.Pool(nb_threads)
        if fmt != 'fits':
            args = [[a, b, c, d, e]
                    for a, b, c, d, e in zip(results[0, :], outnames, results[
                        4, :], [fmt] * len(candidates), title)]
            p.starmap(make_figure, args)
        elif fmt == 'fits':
            args = [[
                a, b, c, d, e, f
            ] for a, b, c, d, e, f in zip(results[0, :], outnames, results[
                1, :], results[2, :], results[3, :], info_dicts)]
            p.starmap(make_fits, args)
        p.close()

        if combined:
            print('Make combined cutouts')
            p = mp.Pool(nb_threads)
            p.starmap(combine_cutouts, args_combined)
            p.close()
Пример #4
0
def filter_candidates(
    sources,
    FWHM_ratio_lower=0.5,
    FWHM_ratio_upper=5.0,
    CNN_model=None,
    CNN_thres=0.0,
    makecutout=True,
    size=32,
    size_cnn=32,
    fmt="png",
    outLevel=1,
    nb_threads=8,
    combined=False,
):
    """Filter transient candidates"""

    # if no sources skip the following
    if len(sources) == 0:
        print("No candidates, no need to filter.")
        return 0

    print("Filter candidates")
    # Take first candidate to extract the path where to store the file
    # No need to chack if substraction was performed,
    # as if it did only the ones from substracted files are with 'Match' == Y
    path, fname_ext = os.path.split(sources["filenames"][0])
    # Get rid of the extension to keep only the name
    fname2, extension = os.path.splitext(fname_ext)
    # Get rid of the _reg pattern
    fname2 = fname2.split("_ref")[0]

    # First get the sources not crossmatching with sources in catalogs
    mask_cat = sources["Match"] == "N"

    # Remove candidates on the edges
    mask_edge = sources["edge"] == "N"

    # Remove sources with FWHM ratio outside the desired range
    FWHM_ratio = sources["FWHM"] / sources["FWHMPSF"]
    mask_FWHM = (FWHM_ratio >= FWHM_ratio_lower) & (FWHM_ratio <=
                                                    FWHM_ratio_upper)

    mask_tot = mask_cat & mask_edge & mask_FWHM
    # Create dictionary with fits info for cutouts to be be given to the CNN model
    # or simply for making fits cutouts
    if CNN_model is not None or fmt == "fits":

        if CNN_model is not None:
            path_CNN_cutouts = os.path.join(path, "CNN_cutouts")
            mkdir_p(path_CNN_cutouts)
            outnames = []
        args_data = []
        info_dicts = []
        for cand in sources:
            coords = [cand["_RAJ2000"], cand["_DEJ2000"]]
            if CNN_model is not None:
                outname = os.path.join(path_CNN_cutouts,
                                       "candidate_%d.fits" % (cand["idx"]))
                outnames.append(outname)
            info_dict = {}
            info_dict["RA"] = cand["_RAJ2000"]
            info_dict["DEC"] = cand["_DEJ2000"]
            info_dict["XPOS"] = cand["Xpos"]
            info_dict["YPOS"] = cand["Ypos"]
            info_dict["FILE"] = cand["filenames"]
            info_dict["CANDID"] = cand["idx"]
            info_dict["MAG"] = cand["mag_calib"]
            info_dict["MAGERR"] = cand["mag_calib_err"]
            info_dict["FWHM"] = cand["FWHM"]
            info_dict["FWHMPSF"] = cand["FWHMPSF"]

            args_data.append([
                cand["filenames"],
                coords,
                "world",
                [size_cnn, size_cnn],
                -1,
            ])

            info_dicts.append(info_dict)

    # Use a trained CNN model to filter candidates.
    if CNN_model is not None:
        print("Create fits cutouts for CNN")

        # Create sub-array
        args_data = np.array(args_data)
        make_sub_image(
            args_data[:, 0],
            outnames,
            args_data[:, 1],
            args_data[:, 2],
            args_data[:, 3],
            args_data[:, 4],
            info_dicts,
            [None] * len(outnames),
            [fmt] * len(outnames),
            nb_threads,
        )

        # The size of the cutout should be the same as the ones used
        # for the CNN training
        print("Use trained CNN model")
        infer(path_CNN_cutouts, CNN_model, 0.1)

        # Add the probability to the canidates table.
        infer_table = ascii.read(
            os.path.join(path_CNN_cutouts, "infer_results.dat"))
        sources = join(sources,
                       infer_table["cand_ID", "label0", "label1"],
                       join_type="left")

        # keep only transients that are above the threshold
        mask_CNN = sources["label1"] >= CNN_thres
        mask_tot = mask_tot & mask_CNN

    # Write output file.
    candidates = sources[mask_tot]

    if fmt == "fits":
        info_dicts_filtered = np.array(info_dicts)[mask_tot]

    # if no sources skip the following
    if len(candidates) == 0:
        print("No candidates, no need to filter.")
        return 0

    # Create ID to start from 1.
    candidates["cand_ID"] = np.arange(len(candidates)) + 1
    # Rename colums
    if "label0" in candidates.colnames:
        candidates.rename_column("label0", "P_False")
        candidates.rename_column("label1", "P_True")
    candidates.write(
        os.path.join(path, fname2 + "_candidates.dat"),
        format="ascii.commented_header",
        overwrite=True,
    )

    # Update
    print("Make cutouts")
    # Extract small image centered on candidates passing the filters
    if makecutout:
        path_cutout = os.path.join(path, "cutouts")
        mkdir_p(path_cutout)
        args_data = []
        outnames = []
        titles = []
        if combined:
            args_combined = []
            path_cutout_combined = os.path.join(path_cutout, "combined")
            mkdir_p(path_cutout_combined)

        for cand in candidates:
            coords = [cand["_RAJ2000"], cand["_DEJ2000"]]
            outname = os.path.join(path_cutout,
                                   "candidate_%d.%s" % (cand["cand_ID"], fmt))
            if combined:
                outname_combined = os.path.join(
                    path_cutout_combined,
                    "candidate_%d_comb.%s" % (cand["cand_ID"], "png"),
                )

            header = fits.getheader(cand["OriginalIma"])
            try:
                date = Time(header["DATE-OBS"], format="fits")
                # convert in GPS time
                # date_JD = date.jd
            except BaseException:
                date = Time(header["MJD-OBS"], format="mjd")
            date.format = "iso"
            if fmt != "fits" or combined:
                _coords = SkyCoord(
                    cand["_RAJ2000"],
                    cand["_DEJ2000"],
                    unit=(u.degree, u.degree),
                    frame="icrs",
                )
                coords_sexa = _coords.to_string(style="hmsdms")
                title = ("RA Dec: %s \n" % coords_sexa + "Time (UTC): %s \n" %
                         (date.value) + "Mag: %.2f +/- %.2f     " %
                         (cand["mag_calib"], cand["mag_calib_err"]) +
                         "     FWHM_ratio: %.2f" %
                         (cand["FWHM"] / cand["FWHMPSF"]))
                if CNN_model is not None:
                    title += "     CNN proba: %.2f " % cand["P_True"]

                titles.append(title)

            args_data.append([
                cand["filenames"],
                coords,
                "world",
                [size, size],
                -1,
            ])
            outnames.append(outname)

            if combined:
                args_combined.append([
                    [cand["OriginalIma"], cand["RefIma"], cand["filenames"]],
                    coords,
                    "world",
                    outname_combined,
                    [size, size],
                    -1,
                    title,
                    [fmt] * len(outname_combined),
                ])

        if fmt != "fits":
            info_dicts_filtered = [None] * len(outnames)
        else:
            titles = [None] * len(outnames)

        # Create sub-array
        args_data = np.array(args_data)

        make_sub_image(
            args_data[:, 0],
            outnames,
            args_data[:, 1],
            args_data[:, 2],
            args_data[:, 3],
            args_data[:, 4],
            info_dicts_filtered,
            titles,
            [fmt] * len(outnames),
            nb_threads,
        )

        if combined:
            print("Make combined cutouts")
            p = mp.Pool(nb_threads)
            p.starmap(combine_cutouts, args_combined)
            p.close()
Пример #5
0
def subimage(path, training, size=32, radius=1, flag_notsub=False, false=False):
    """ Extract a sub-image centered on the candidate position """

    path_gmadet = getpath()

    # size of the extracted image
    cutsize = (size, size)
    print("Combine the detections from all simulated images.")
    candidates_list = getCandPos(path, flag_notsub=flag_notsub)

    if training:
        print("Crossmatch simulated events with detections")
        sim_list = crossmatch_detections(path, candidates_list, radius=radius)

        resdir = os.path.join(path, "candidates_training")
        mkdir_p(resdir)
        truedir = os.path.join(path, "candidates_training", "true")
        mkdir_p(truedir)
        falsedir = os.path.join(path, "candidates_training", "false")
        mkdir_p(falsedir)
    else:
        resdir = os.path.join(path, "candidates")
        mkdir_p(resdir)

    for i, cand in enumerate(candidates_list):
        print(
            "processing candidates %d/%d ..." % (i, len(candidates_list)),
            end="\r",
            flush=True,
        )

        OT_coords = [cand["RA"], cand["Dec"]]

        if training:
            # Check if corresponds to a simulated event
            mask = sim_list["closest_candID"] == cand["ID"]
            # Check whether it is a simulated object
            # mask1 = sim_list['filename2'] == cand['OriginalIma']
            # mask2 = (sim_list['RA'] - cand['RA'])**2 + \
            #        (sim_list['Dec'] - cand['Dec'])**2 < (radius/3600)**2
            # mask = np.bitwise_and(mask1,mask2)

            # Consider only sources with a single match.
            # Some real sources close to a cosmic or another will not be
            # consider. But with a visual inspection they can be easily
            # classified as true transient.
            if len(sim_list[mask]) == 1:
                outdir = truedir
            else:
                if false:
                    outdir = falsedir
                else:
                    outdir = resdir
            inputname = cand["filename"]
        else:
            outdir = resdir
            inputname = cand["filename"]

        outname = os.path.join(outdir, "candidate_%d.fits" % i)
        # If the inputname can not be found for some reasons
        # Make sure the code is not crashing
        try:
            # Get initial image size
            hdr_input = fits.getheader(inputname)
            Naxis1 = hdr_input["NAXIS1"]
            Naxis2 = hdr_input["NAXIS2"]

            # Extract small image
            # Only one threads as we provide one image after another
            make_sub_image(
                inputname,
                outname,
                OT_coords,
                coords_type="world",
                sizes=[size, size],
                FoVs=-1,
                fmts="fits",
                nb_threads=1,
            )

            # add information to header
            hdus = fits.open(outname, memmap=False)
            hdr = hdus[0].header
            hdr["MAG"] = cand["mag"]
            hdr["MAGERR"] = cand["magerr"]
            hdr["FWHM"] = cand["FWHM"]
            hdr["FWHMPSF"] = cand["FWHMPSF"]
            hdr["FILTER"] = cand["Band"]
            hdr["RA"] = cand["RA"]
            hdr["Dec"] = cand["Dec"]
            hdr["Xpos"] = cand["Xpos"]
            hdr["Ypos"] = cand["Ypos"]
            hdr["FILE"] = cand["filename"]
            hdr["CANDID"] = cand["ID"]
            # Whether it is close to the edge of the image
            # If yes the image will not be size x size in pixels
            """
            if (
                (cand["Xpos"] > Naxis1 - size)
                or (cand["Xpos"] < size)
                or (cand["Ypos"] > Naxis2 - size)
                or (cand["Ypos"] < size)
            ):
                hdr["edge"] = "True"
                print("Edge ", outname)
            else:
                hdr["edge"] = "False"
            """
            # If source too close to the edge, the cutout has dimension
            # smaller than the reauired (size x size)
            # It will cause the CNN to crash so flag them.
            if hdus[0].data.shape != (size, size):
                hdr["edge"] = "True"
            else:
                hdr["edge"] = "False"
            hdus.writeto(outname, overwrite=True)

        except BaseException:
            print("Could not extract candidate in %s" % inputname)
Пример #6
0
def sim(datapath, filenames, Ntrans=50, size=48,
        magrange=[14, 22], gain=None, magzp=30):
    """Insert point sources in real images """

    filenames = np.atleast_1d(filenames)

    #simdir = os.path.join(datapath, "simulation")
    simdir = datapath
    mkdir_p(simdir)

    cutsize = np.array([size, size], dtype=np.int32)

    hcutsize = cutsize // 2

    #  List to store position of simulated transients
    trans_pix = []
    trans_wcs = []
    filelist = []
    maglist = []
    filterlist = []
    cpsf1 = np.zeros((cutsize[1], cutsize[0]))

    counter = 0
    # To limit the number of images in which
    # to simulate stars, psf are also included in filenames
    # filenames = filenames[:4]
    for filename in filenames:
        if "psf" not in filename and "weight" not in filename:
            name = os.path.basename(filename)
            # print("\x1b[2K", end='\r', flush=True),
            # print("Loading " + epoch1 + " image data ...", end='\r',
            # flush=True),
            hdusi1 = fits.open(filename, memmap=False)
            headi1 = hdusi1[0].header
            band = str(headi1["FILTER"])
            ima1 = hdusi1[0].data.astype(np.float32)
            hdusp1 = fits.open(
                os.path.splitext(filename)[0] +
                "_psf.fits",
                memmap=False)
            headp1 = hdusp1[0].header
            step1 = headp1["PSF_SAMP"]
            nb_psf_snaps = int(headp1["PSF_NB"])
            psfs1 = hdusp1[0].data.astype(np.float32)
            imsize = ima1.shape
            posfac = np.array([nb_psf_snaps, nb_psf_snaps]) / imsize
            w = wcs.WCS(headi1)
            # try to use GAIN from header
            if gain is None:
                try:
                    gain = headp1["GAIN"]
                except BaseException:
                    print("GAIN keyword not found in header, set to 1.0.")
                    gain = 1.0

            # Add the transients to image
            pos = np.zeros((Ntrans, 2), dtype=float)
            for j in range(Ntrans):
                # newfile = os.path.join(simdir, os.path.splitext(name)[
                #                       0] + "_" + str(counter) + ".fits")
                # Keep same name actually, if works then remove line above.
                newfile = filename
                filelist.append(os.path.abspath(newfile))

                filterlist.append(band)

                pos[j] = np.random.random_sample(
                    2) * (imsize - cutsize) + cutsize / 2.0
                # store positions
                ra, dec = w.wcs_pix2world(pos[j][1], pos[j][0], 1)
                trans_pix.append(pos[j])
                trans_wcs.append([ra, dec])

                # get pixels indexes in the image
                ipos = pos[j].astype(int)
                # extract subimage centered on position of the object and
                # store in cima1
                # same for weight maps
                iposrange = np.s_[
                    ipos[0] - hcutsize[0]: ipos[0] + hcutsize[0],
                    ipos[1] - hcutsize[1]: ipos[1] + hcutsize[1],
                ]

                # Select the PSF corresponding to the image area,
                # in case there are more than one PSF estimated per axis
                if nb_psf_snaps == 1:
                    psf1 = psfs1
                else:
                    #  get position with respect to number of PSF snapshots
                    ppos = (pos[j] * posfac).astype(int)
                    psf1 = psfs1[ppos[0], ppos[1]]
                # step1 is psf_samp parameter from psfex, used in mat1 and 2
                # to rescale psf to image.
                # psf1 is from psfex, and has different sampling than image1.
                # New object put in the center of this matrix.
                mat1 = np.array(
                    [
                        [step1, 0., hcutsize[0] - psf1.shape[0] * step1 / 2.],
                        [0., step1, hcutsize[1] - psf1.shape[0] * step1 / 2.],
                    ]
                )
                # transformation of the PSF, resampling.
                cpsf1 = cv2.warpAffine(
                    psf1, mat1, cpsf1.shape, flags=cv2.INTER_LANCZOS4
                )

                # define the object magnitude using this random number and
                # predefined ranges
                mag = np.random.uniform(
                    low=magrange[0], high=magrange[1], size=(1,))
                # convert the magnitude in ADU using the zeropoint magnitude.
                # Note that the zeropoint magnitude is define as 30,
                # so did not care of the exact value.
                # simply needed to draw random magnitudes. We could estimate
                # the proper one for our telescopes
                maglist.append(mag[0])
                amp1 = np.exp(0.921034 * (magzp - mag))

                # Apply Poisson Noise to simulated object
                noisy_object1 = cpsf1 * amp1
                # np.random.poisson(cpsf1 * amp1)
                noisy_object1[noisy_object1 < 0] = 0
                noisy_object1 = np.random.poisson(noisy_object1)
                noisy_object1 = noisy_object1 / gain

                ima1[iposrange] += noisy_object1

            hdusi1[0].data = ima1
            #  Write new fits file
            hdusi1.writeto(newfile, overwrite=True)

            hdusi1.close()
            hdusp1.close()
            counter += 1
    xypos = np.array(trans_pix)
    wcspos = np.array(trans_wcs)
    idx = np.arange(len(xypos))
    table = Table(
        [
            idx,
            filelist,
            xypos[:, 1],
            xypos[:, 0],
            wcspos[:, 0],
            wcspos[:, 1],
            maglist,
            filterlist,
        ],
        names=[
            "idx",
            "filename",
            "Xpos",
            "Ypos",
            "RA",
            "Dec",
            "mag",
            "filter"],
    )
    table.write(
        os.path.join(simdir, "simulated_objects.list"),
        format="ascii.commented_header",
        overwrite=True,
    )
    return table
Пример #7
0
def substraction(filenames, reference, config, soft="hotpants",
                 method="individual", doMosaic=False, 
                 verbose="NORMAL", outLevel=1, nb_threads=8):
    """Substract a reference image to the input image"""

    imagelist = np.atleast_1d(filenames)
    for ima in imagelist:
        # Create folder with substraction results
        path, filename = os.path.split(ima)
        if path:
            folder = path + "/"
        else:
            folder = ""

        resultDir = folder + "substraction/"
        mkdir_p(resultDir)

        # Get coordinates of input image
        im_coords = get_corner_coords(ima)

        # Define the reference image
        if reference == "ps1":
            _, band, _ = get_phot_cat(ima, None)
            if band == "B":
                band = "g"
            elif band == "V":
                band = "g"
            elif band == "R":
                band = "r"
            elif band == "I":
                band = "i"
            elif band == "g+r":
                band = "r"
            # band = 'g'
            ps1_cell_table = ps1_grid(im_coords)
            #  Get PS1 files with whom to perform substraction
            subfiles = prepare_PS1_sub(
                ps1_cell_table, band, ima, config, verbose=verbose,
                method=method
            )
            regis_info = registration(
                subfiles, config, resultDir=resultDir, reference=reference,
                verbose=verbose
            )

            if soft == "hotpants":
                subFiles = hotpants(regis_info, config, verbose=verbose,
                        nb_threads=nb_threads)

        #  create a mosaic of al substracted images when
        # ps1_method=='individual'
        #  Mosaic for substracted files
        if method == "individual" and doMosaic:
            subfiles = np.array(subFiles)
            #  Mosaic for input file
            sublist = [i for i in subfiles[:, 0]]
            outName = os.path.splitext(filename)[0] + "_mosaic"
            create_mosaic(
                sublist, ima, resultDir, outName, config=config,
                verbose=verbose
            )
            #  Mosaic for ps1 reference files
            sublist = [i for i in subfiles[:, 1]]
            outName = os.path.splitext(filename)[0] + "_mosaic_ps1"
            create_mosaic(
                sublist, ima, resultDir, outName, config=config,
                verbose=verbose
            )
            #  Mosaic for substracted files
            sublist = [i for i in subfiles[:, 2]]
            outName = os.path.splitext(filename)[0] + "_mosaic_sub"
            create_mosaic(
                sublist, ima, resultDir, outName, config=config,
                verbose=verbose
            )
            #  Mosaic for mask applied to substracted files
            # Actually there is no need
            
            sublist = [i for i in subfiles[:, 3]]
            outName = os.path.splitext(filename)[0] + "_mosaic_sub_mask"
            create_mosaic(
                sublist, ima, resultDir, outName, config=config,
                verbose=verbose
            )
            

    #  Delete files if necessary, mainly to save space disk
    #  Problem when deleting files, they will appear in output files but
    #  user can not have a look at some that might be important
    if outLevel == 0:
        # rm_p(ima)
        rm_p(refim)
        rm_p(refim_mask)
        rm_p(ima_regist)
        rm_p(refim_regist)
        rm_p(refim_regist_mask)

    return subFiles
Пример #8
0
def train(path_cube, path_model, modelname, epochs,
          frac=0.1, dropout=0.3):
    """Train CNN with simulated data"""

    gpus = -1
    path_model = os.path.join(path_model, 'CNN_training/')
    mkdir_p(path_model)

    # Fraction of data used for the validation test
    fract = frac
    # define dropout percentageof each dropout
    dprob = np.array([dropout, dropout, dropout])
    # define padding
    padding = "same"  # valid, same
    # number of epochs
    epochs = epochs
    # outputname for the trained model
    model_name = os.path.join(path_model, "%s.h5" % modelname)

    print("Loading " + path_cube + " ...", end="\r", flush=True)
    data = np.load(path_cube)
    ima = data["cube"]
    lab = keras.utils.to_categorical(data["labels"])
    mag = data["mags"]
    errmag = data["errmags"]
    band = data["filters"]
    cand_ids = data["candids"]
    nclass = lab.shape[1]
    n = ima.shape[0]
    nt = int(n * fract)

    print("Shuffling data ...", end="\r", flush=True)
    randomize = np.arange(n)
    np.random.shuffle(randomize)
    ima = ima[randomize]
    lab = lab[randomize]
    mag = mag[randomize]
    errmag = errmag[randomize]
    band = band[randomize]
    cand_ids = cand_ids[randomize]

    print("Splitting dataset ...", end="\r", flush=True)
    imal = ima[nt:]
    labl = lab[nt:]
    magl = mag[nt:]
    errmagl = errmag[nt:]
    bandl = band[nt:]
    cand_idsl = cand_ids[nt:]

    imat = ima[:nt]
    labt = lab[:nt]
    magt = mag[:nt]
    errmagt = errmag[:nt]
    bandt = band[:nt]
    cand_idst = cand_ids[:nt]

    model = keras.models.Sequential()

    model.add(
        keras.layers.Conv2D(64, (3, 3), activation="elu",
                            padding=padding,
                            input_shape=ima.shape[1:])
    )
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.AveragePooling2D(pool_size=(2, 2)))
    # model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dprob[0]))
    model.add(
        keras.layers.Conv2D(
            128, (3, 3), activation="elu", padding=padding))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dprob[1]))
    model.add(
        keras.layers.Conv2D(
            256, (3, 3), activation="elu", padding=padding))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dprob[1]))
    model.add(
        keras.layers.Conv2D(
            256, (3, 3), activation="elu", padding=padding))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dprob[2]))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(512, activation="elu"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(dprob[2]))
    model.add(keras.layers.Dense(32, activation="elu"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(dprob[2]))
    model.add(keras.layers.Dense(nclass, activation="softmax"))

    model.summary()

    if gpus > 0:
        parallel_model = multi_gpu_model(model, gpus=gpus)

        parallel_model.compile(
            loss="categorical_crossentropy",
            optimizer=keras.optimizers.Adam(lr=0.001),
            # optimizer=keras.optimizers.Nadam(),
            metrics=["accuracy"],
        )

        parallel_model.fit(
            imal,
            labl,
            batch_size=1024,
            epochs=epochs,
            verbose=1,
            validation_data=(imat, labt),
        )

        score = parallel_model.evaluate(imat, labt, verbose=0)
        # save does not work on multi_gpu_model
        # parallel_model.save(model_name)
        labp = parallel_model.predict(imat)

    else:

        model.compile(
            loss="categorical_crossentropy",
            optimizer=keras.optimizers.Adam(lr=0.001),
            # optimizer=keras.optimizers.Nadam(),
            metrics=["accuracy"],
        )
        # log = keras.callbacks.ModelCheckpoint(
        #       'callbacks.h5', monitor='val_loss', verbose=0,
        #       save_best_only=True, save_weights_only=False,
        #       mode='auto', period=1)
        # log = keras.callbacks(TensorBoard(
        #        log_dir='./logs', histogram_freq=5, batch_size=1024,
        #        write_graph=True, write_grads=False, write_images=False,
        #        embeddings_freq=0, embeddings_layer_names=None,
        #        embeddings_metadata=None, embeddings_data=None,
        #        update_freq='epoch'))

        model.fit(
            imal,
            labl,
            batch_size=1024,
            epochs=epochs,
            verbose=1,
            validation_data=(imat, labt),
        )
        score = model.evaluate(imat, labt, verbose=0)
        labp = model.predict(imat)

    model.save(model_name)

    trange = np.arange(0.5, 1.0, 0.0001)
    fig, ax = plt.subplots()
    ax.set_xlabel("FPR")
    ax.set_ylabel("TPR")
    mag_min = np.min(magt[magt != 99])
    mag_max = np.max(magt[magt != 99])
    for maglim in np.linspace(mag_min, mag_max, 6):
        labpm = labp[magt < maglim]
        labtm = labt[magt < maglim]
        labpf = labpm[labtm[:, 1] <= 0.5]
        labpt = labpm[labtm[:, 1] > 0.5]
        tpr = [np.mean(labpt[:, 1] > t) for t in trange]
        fpr = [np.mean(labpf[:, 1] > t) for t in trange]
        plt.plot(fpr, tpr, label="mag < %.2f" % maglim)
    legend = ax.legend(loc="lower right")
    plt.savefig(os.path.join(path_model, modelname + "_ROC_mag.png"))
    # plt.show()

    # ROC with dmag
    fig, ax = plt.subplots()
    ax.set_xlabel("FPR")
    ax.set_ylabel("TPR")
    errmag_min = np.min(abs(errmagt[errmagt != 0]))
    errmag_max = np.max(abs(errmagt[errmagt != 0]))
    for errmaglim in np.linspace(errmag_min, errmag_max, 6):
        labpm = labp[errmagt < errmaglim]
        labtm = labt[errmagt < errmaglim]
        labpf = labpm[labtm[:, 1] <= 0.5]
        labpt = labpm[labtm[:, 1] > 0.5]
        tpr = [np.mean(labpt[:, 1] > t) for t in trange]
        fpr = [np.mean(labpf[:, 1] > t) for t in trange]
        plt.plot(fpr, tpr, label="errmag < %.2f" % errmaglim)
    legend = ax.legend(loc="lower right")
    plt.savefig(os.path.join(path_model, modelname + "_ROC_errmag.png"))
    # plt.show()

    fig, ax = plt.subplots()
    ax.set_xlabel("FPR")
    ax.set_ylabel("TPR")
    for band in ["g", "r", "i", "z"]:
        labpm = labp[bandt == band]
        labtm = labt[bandt == band]
        labpf = labpm[labtm[:, 1] <= 0.5]
        labpt = labpm[labtm[:, 1] > 0.5]
        tpr = [np.mean(labpt[:, 1] > t) for t in trange]
        fpr = [np.mean(labpf[:, 1] > t) for t in trange]
        plt.plot(fpr, tpr, label="%s" % band)
    legend = ax.legend(loc="lower right")
    plt.savefig(os.path.join(path_model, modelname + "_ROC_band.png"))