def convert(path_datacube, cubename, path_cutouts, frac_true): """Convert simulated data before starting training""" outdir = os.path.join(path_datacube, "datacube") mkdir_p(outdir) # Get all the prefixes corresponding to one field truelist = glob.glob(os.path.join(path_cutouts, "true", "*.fits")) falselist = glob.glob(os.path.join(path_cutouts, "false", "*.fits")) # output cube name npz_name = "%s.npz" % cubename Ncand_true = len(truelist) Ncand_false = len(falselist) if Ncand_true > Ncand_false: Ncand_true_max = floor(2*Ncand_false*frac_true) Ncand_false_max = floor(2*Ncand_false*(1-frac_true)) elif Ncand_true <= Ncand_false: Ncand_true_max = floor(2*Ncand_true*frac_true) Ncand_false_max = floor(2*Ncand_true*(1-frac_true)) Ncand_tot = len(truelist) + len(falselist) Ncand = Ncand_true_max + Ncand_false_max cube = [] # np.zeros((Ncand, 64, 64)) labels = [] mags = [] errmags = [] cand_ids = [] filters = [] counter_true = 0 for cand in truelist: if counter_true < Ncand_true_max: hdus = fits.open(cand, memmap=False) head = hdus[0].header # Exclude cases too close to the edge # Meaning they are located at less than the defined size # of the small images if head["EDGE"] == "False": labels += [1] mags += [head["MAG"]] errmags += [head["MAGERR"]] filters += [head["FILTER"]] cand_ids += [head["CANDID"]] cube.append(hdus[0].data) hdus.close() else: break counter_true = counter_true+1 counter_false = 0 for cand in falselist: if counter_false < Ncand_false_max: hdus = fits.open(cand, memmap=False) head = hdus[0].header # if hdus[0].data.shape != (64, 64): # print ('skip %s as its shape is not (64,64): (%d,%d)' # % (cand, hdus[0].data.shape[0], hdus[0].data.shape[1])) # Exclude cases too close to the edge # Meaning they are located at less than the defined size # of the small images if head["EDGE"] == "False": labels += [0] mags += [head["MAG"]] errmags += [head["MAGERR"]] filters += [head["FILTER"]] cand_ids += [head["CANDID"]] cube.append(hdus[0].data) hdus.close() else: break counter_false = counter_false+1 print("The datacube contains", str(Ncand), "candidates with Ntrue =", str(counter_true), "and Nfalse =", str(counter_false)) print("Converting and reshaping arrays ...") # Convert lists to B.I.P. NumPy arrays # Check whether all candidates has 64x64 pixels # If not, delete them # This can happen at the edge of images # for i in range(len(cube)): # if np.array(cube[i]).shape != (64, 64): # print (i, np.array(cube[i]).shape) # del cube[i] cube = np.asarray(cube, dtype=np.float32) if cube.ndim < 4: cube = np.reshape( cube, [ cube.shape[0], cube.shape[1], cube.shape[2], 1]) else: cube = np.moveaxis(cube, 1, -1) # Report dimensions of the data cube print("Saving %d %d×%d×%d image datacube ..." % cube.shape, end="\r", flush=True) np.savez( os.path.join(outdir, npz_name), cube=cube, labels=labels, mags=mags, errmags=errmags, filters=filters, candids=cand_ids ) print("Saved to " + os.path.join(outdir, npz_name))
def makestats(path, radius=2): """ Create some statistics on the simulated events """ mkdir_p(os.path.join(path, 'CheckSim')) candidates_list = getCandPos(path) # Load file crossmatching the detected candidates with simulated events # Try to load file if already exists # Otherwise create it try: crossmatch = ascii.read(os.path.join(path, "crossmatch.dat")) except BaseException: crossmatch = crossmatch_detections(path, candidates_list, radius=radius) # mask_det = crossmatch["Nmatches"] == 1 # Actually take everything with a match. mask_det = crossmatch["Nmatches"] > 0 bands = crossmatch.group_by("filter").groups.keys # plot histogram of simulated sources magnitude plt.figure() n1, bins1, patches = plt.hist( crossmatch["mag"], 30, facecolor="C0", alpha=0.75, density=False, stacked=False, label="Sim", ) n2, bins2, patches = plt.hist( crossmatch["mag"][mask_det], bins1, facecolor="C1", alpha=0.5, density=False, stacked=False, label="Det", ) plt.xlabel("mag") plt.ylabel("N") plt.title("All filters") plt.grid(True) plt.legend() plt.savefig(os.path.join(path, "CheckSim/sim_mag_distrib_allbands.png")) # plot fraction of detection for a given magnitude bin plt.figure() x = (bins1[1:] + bins1[:-1]) / 2 plt.plot(x, n2 / n1) plt.xlabel('Simulated magnitude') plt.ylabel('Fraction (%)') plt.title('Fraction of simulated events detected.') plt.savefig(os.path.join(path, 'CheckSim/detection_fraction_allbands.png')) for band in bands: mask_band = crossmatch["filter"] == band[0] mask = np.bitwise_and(mask_det, mask_band) plt.figure() n1, bins1, patches = plt.hist( crossmatch["mag"][mask_band], 30, facecolor="C0", alpha=0.75, density=False, stacked=False, label="Sim", ) n2, bins2, patches = plt.hist( crossmatch["mag"][mask], bins1, facecolor="C1", alpha=0.5, density=False, stacked=False, label="Det", ) plt.xlabel("mag") plt.ylabel("N") plt.title("%s band" % band[0]) plt.grid(True) plt.legend() plt.savefig( os.path.join(path, "CheckSim/sim_mag_distrib_%s.png" % band[0]))
def filter_candidates(sources, FWHM_ratio_lower=0.5, FWHM_ratio_upper=5.0, CNN_model=None, CNN_thres=0.0, makecutout=True, size=100, size_cnn=32, fmt='png', outLevel=1, nb_threads=8, combined=False): """Filter transient candidates""" print('Filter candidates') # Take first candidate to extract the path where to store the file # No need to chack if substraction was performed, # as if it did only the ones from substracted files are with 'Match' == Y path, fname_ext = os.path.split(sources['filenames'][0]) # Get rid of the extension to keep only the name fname2, extension = os.path.splitext(fname_ext) # Get rid of the _reg pattern fname2 = fname2.split('_ref')[0] # First get the sources not crossmatching with sources in catalogs mask_cat = sources['Match'] == 'N' # Remove candidates on the edges mask_edge = sources["edge"] == 'N' # Remove sources with FWHM ratio outside the desired range FWHM_ratio = sources["FWHM"] / sources["FWHMPSF"] mask_FWHM = (FWHM_ratio >= FWHM_ratio_lower) & \ (FWHM_ratio <= FWHM_ratio_upper) mask_tot = mask_cat & mask_edge & mask_FWHM # Use a trained CNN model to filter candidates. if CNN_model is not None: print('Create fits cutouts for CNN') # Create fits cutouts to be be given to the CNN model path_CNN_cutouts = os.path.join(path, 'CNN_cutouts') mkdir_p(path_CNN_cutouts) args_data = [] outnames = [] info_dicts = [] for cand in sources: coords = [cand['_RAJ2000'], cand['_DEJ2000']] outname = os.path.join(path_CNN_cutouts, 'candidate_%d.fits' % (cand['idx'])) info_dict = {} info_dict['RA'] = cand['_RAJ2000'] info_dict['DEC'] = cand['_DEJ2000'] info_dict['XPOS'] = cand['Xpos'] info_dict['YPOS'] = cand['Ypos'] info_dict['FILE'] = cand['filenames'] info_dict['CANDID'] = cand['idx'] info_dict['MAG'] = cand['mag_calib'] info_dict['MAGERR'] = cand['mag_calib_err'] info_dict['FWHM'] = cand['FWHM'] info_dict['FWHMPSF'] = cand['FWHMPSF'] args_data.append([ cand['filenames'], coords, "world", [size_cnn, size_cnn], -1, ]) outnames.append(outname) info_dicts.append(info_dict) """ # Run the make_sub_image in asynchroneous parallel # using many processes. # Need to cut the args list in the number of required threads # There is may be another way? N_sources = len(sources) # Check if there less data than number of threads. if N_sources < nb_threads: nb_threads = 1 Ncut = int(N_sources / nb_threads) args_threads = [] idx_stop = [] for i in range(nb_threads): if i == 0: args_threads.append(args[i * Ncut : (i+1) * Ncut]) idx_stop.append((i+1) * Ncut) elif i > 0 and i < nb_threads-1: args_threads.append(args[i * Ncut : (i+1) * Ncut]) idx_stop.append((i+1) * Ncut) elif i == nb_threads-1: args_threads.append(args[i * Ncut : N_sources]) idx_stop.append(N_sources) if idx_stop[-1] >= N_sources: break args_threads = np.array(args_threads) """ args_data = np.array(args_data) pool = mp.Pool(nb_threads) # call apply_async() without callback """ result_objects = [pool.apply_async(make_sub_image, args=(j[0,:], j[1,:], j[2,:], j[3,:], j[4,:])) for j in args_threads] """ result_objects = [ pool.apply_async(make_sub_image, args=(args_data[:, 0], args_data[:, 1], args_data[:, 2], args_data[:, 3], args_data[:, 4])) ] # result_objects is a list of pool.ApplyResult objects results = [r.get() for r in result_objects] # Don't forget to close pool.close() pool.join() results = np.array(results[0]) # Create fits cutouts p = mp.Pool(nb_threads) args = [[a, b, c, d, e, f] for a, b, c, d, e, f in zip(results[0, :], outnames, results[ 1, :], results[2, :], results[3, :], info_dicts)] p.starmap(make_fits, args) p.close() # Run CNN model to associate a probability to each cutout # The size of the cutout should be the same as the ones used # for the CNN training print("Use trained CNN model") infer(path_CNN_cutouts, CNN_model, 0.1) # Add the probability to the canidates table. infer_table = ascii.read( os.path.join(path_CNN_cutouts, 'infer_results.dat')) sources = join(sources, infer_table['idx', 'label0', 'label1'], join_type='left') # keep only transients that are above the threshold mask_CNN = sources['label1'] >= CNN_thres mask_tot = mask_tot & mask_CNN # Write output file. candidates = sources[mask_tot] # Create ID to start from 1. candidates['cand_ID'] = np.arange(len(candidates)) + 1 # Rename colums if 'label0' in candidates.colnames: candidates.rename_column('label0', 'P_False') candidates.rename_column('label1', 'P_True') candidates.write(os.path.join(path, fname2 + '_candidates.dat'), format='ascii.commented_header', overwrite=True) # Update print('Make cutouts') # Extract small image centered on candidates passing the filters if makecutout: path_cutout = os.path.join(path, 'cutouts') mkdir_p(path_cutout) args_data = [] outnames = [] if combined: args_combined = [] path_cutout_combined = os.path.join(path_cutout, 'combined') mkdir_p(path_cutout_combined) for cand in candidates: coords = [cand['_RAJ2000'], cand['_DEJ2000']] outname = os.path.join(path_cutout, 'candidate_%d.%s' % (cand['cand_ID'], fmt)) if combined: outname_combined = os.path.join( path_cutout_combined, 'candidate_%d_comb.%s' % (cand['cand_ID'], 'png')) header = fits.getheader(cand['OriginalIma']) try: date = Time(header["DATE-OBS"], format="fits") # convert in GPS time # date_JD = date.jd except BaseException: date = Time(header["MJD-OBS"], format="mjd") date.format = 'iso' if fmt != 'fits' or combined: _coords = SkyCoord(cand['_RAJ2000'], cand['_DEJ2000'], unit=(u.degree, u.degree), frame='icrs') coords_sexa = _coords.to_string(style='hmsdms') title = "RA Dec: %s \n" % coords_sexa + \ "Time (UTC): %s \n" % (date.value) + \ "Mag: %.2f +/- %.2f " % (cand['mag_calib'], cand['mag_calib_err']) + \ " FWHM_ratio: %.2f" % ( cand['FWHM']/cand['FWHMPSF']) if CNN_model is not None: title += " CNN proba: %.2f " % cand['P_True'] args_data.append([ cand['filenames'], coords, "world", [size_cnn, size_cnn], -1, ]) outnames.append(outname) if combined: args_combined.append( [[cand['OriginalIma'], cand['RefIma'], cand['filenames']], coords, "world", outname_combined, [size, size], -1, title]) # Create sub-array args_data = np.array(args_data) pool = mp.Pool(nb_threads) # call apply_async() without callback result_objects = [ pool.apply_async(make_sub_image, args=(args_data[:, 0], args_data[:, 1], args_data[:, 2], args_data[:, 3], args_data[:, 4])) ] # result_objects is a list of pool.ApplyResult objects results = [r.get() for r in result_objects] # Don't forget to close pool.close() pool.join() results = np.array(results[0]) # Create fits cutouts p = mp.Pool(nb_threads) if fmt != 'fits': args = [[a, b, c, d, e] for a, b, c, d, e in zip(results[0, :], outnames, results[ 4, :], [fmt] * len(candidates), title)] p.starmap(make_figure, args) elif fmt == 'fits': args = [[ a, b, c, d, e, f ] for a, b, c, d, e, f in zip(results[0, :], outnames, results[ 1, :], results[2, :], results[3, :], info_dicts)] p.starmap(make_fits, args) p.close() if combined: print('Make combined cutouts') p = mp.Pool(nb_threads) p.starmap(combine_cutouts, args_combined) p.close()
def filter_candidates( sources, FWHM_ratio_lower=0.5, FWHM_ratio_upper=5.0, CNN_model=None, CNN_thres=0.0, makecutout=True, size=32, size_cnn=32, fmt="png", outLevel=1, nb_threads=8, combined=False, ): """Filter transient candidates""" # if no sources skip the following if len(sources) == 0: print("No candidates, no need to filter.") return 0 print("Filter candidates") # Take first candidate to extract the path where to store the file # No need to chack if substraction was performed, # as if it did only the ones from substracted files are with 'Match' == Y path, fname_ext = os.path.split(sources["filenames"][0]) # Get rid of the extension to keep only the name fname2, extension = os.path.splitext(fname_ext) # Get rid of the _reg pattern fname2 = fname2.split("_ref")[0] # First get the sources not crossmatching with sources in catalogs mask_cat = sources["Match"] == "N" # Remove candidates on the edges mask_edge = sources["edge"] == "N" # Remove sources with FWHM ratio outside the desired range FWHM_ratio = sources["FWHM"] / sources["FWHMPSF"] mask_FWHM = (FWHM_ratio >= FWHM_ratio_lower) & (FWHM_ratio <= FWHM_ratio_upper) mask_tot = mask_cat & mask_edge & mask_FWHM # Create dictionary with fits info for cutouts to be be given to the CNN model # or simply for making fits cutouts if CNN_model is not None or fmt == "fits": if CNN_model is not None: path_CNN_cutouts = os.path.join(path, "CNN_cutouts") mkdir_p(path_CNN_cutouts) outnames = [] args_data = [] info_dicts = [] for cand in sources: coords = [cand["_RAJ2000"], cand["_DEJ2000"]] if CNN_model is not None: outname = os.path.join(path_CNN_cutouts, "candidate_%d.fits" % (cand["idx"])) outnames.append(outname) info_dict = {} info_dict["RA"] = cand["_RAJ2000"] info_dict["DEC"] = cand["_DEJ2000"] info_dict["XPOS"] = cand["Xpos"] info_dict["YPOS"] = cand["Ypos"] info_dict["FILE"] = cand["filenames"] info_dict["CANDID"] = cand["idx"] info_dict["MAG"] = cand["mag_calib"] info_dict["MAGERR"] = cand["mag_calib_err"] info_dict["FWHM"] = cand["FWHM"] info_dict["FWHMPSF"] = cand["FWHMPSF"] args_data.append([ cand["filenames"], coords, "world", [size_cnn, size_cnn], -1, ]) info_dicts.append(info_dict) # Use a trained CNN model to filter candidates. if CNN_model is not None: print("Create fits cutouts for CNN") # Create sub-array args_data = np.array(args_data) make_sub_image( args_data[:, 0], outnames, args_data[:, 1], args_data[:, 2], args_data[:, 3], args_data[:, 4], info_dicts, [None] * len(outnames), [fmt] * len(outnames), nb_threads, ) # The size of the cutout should be the same as the ones used # for the CNN training print("Use trained CNN model") infer(path_CNN_cutouts, CNN_model, 0.1) # Add the probability to the canidates table. infer_table = ascii.read( os.path.join(path_CNN_cutouts, "infer_results.dat")) sources = join(sources, infer_table["cand_ID", "label0", "label1"], join_type="left") # keep only transients that are above the threshold mask_CNN = sources["label1"] >= CNN_thres mask_tot = mask_tot & mask_CNN # Write output file. candidates = sources[mask_tot] if fmt == "fits": info_dicts_filtered = np.array(info_dicts)[mask_tot] # if no sources skip the following if len(candidates) == 0: print("No candidates, no need to filter.") return 0 # Create ID to start from 1. candidates["cand_ID"] = np.arange(len(candidates)) + 1 # Rename colums if "label0" in candidates.colnames: candidates.rename_column("label0", "P_False") candidates.rename_column("label1", "P_True") candidates.write( os.path.join(path, fname2 + "_candidates.dat"), format="ascii.commented_header", overwrite=True, ) # Update print("Make cutouts") # Extract small image centered on candidates passing the filters if makecutout: path_cutout = os.path.join(path, "cutouts") mkdir_p(path_cutout) args_data = [] outnames = [] titles = [] if combined: args_combined = [] path_cutout_combined = os.path.join(path_cutout, "combined") mkdir_p(path_cutout_combined) for cand in candidates: coords = [cand["_RAJ2000"], cand["_DEJ2000"]] outname = os.path.join(path_cutout, "candidate_%d.%s" % (cand["cand_ID"], fmt)) if combined: outname_combined = os.path.join( path_cutout_combined, "candidate_%d_comb.%s" % (cand["cand_ID"], "png"), ) header = fits.getheader(cand["OriginalIma"]) try: date = Time(header["DATE-OBS"], format="fits") # convert in GPS time # date_JD = date.jd except BaseException: date = Time(header["MJD-OBS"], format="mjd") date.format = "iso" if fmt != "fits" or combined: _coords = SkyCoord( cand["_RAJ2000"], cand["_DEJ2000"], unit=(u.degree, u.degree), frame="icrs", ) coords_sexa = _coords.to_string(style="hmsdms") title = ("RA Dec: %s \n" % coords_sexa + "Time (UTC): %s \n" % (date.value) + "Mag: %.2f +/- %.2f " % (cand["mag_calib"], cand["mag_calib_err"]) + " FWHM_ratio: %.2f" % (cand["FWHM"] / cand["FWHMPSF"])) if CNN_model is not None: title += " CNN proba: %.2f " % cand["P_True"] titles.append(title) args_data.append([ cand["filenames"], coords, "world", [size, size], -1, ]) outnames.append(outname) if combined: args_combined.append([ [cand["OriginalIma"], cand["RefIma"], cand["filenames"]], coords, "world", outname_combined, [size, size], -1, title, [fmt] * len(outname_combined), ]) if fmt != "fits": info_dicts_filtered = [None] * len(outnames) else: titles = [None] * len(outnames) # Create sub-array args_data = np.array(args_data) make_sub_image( args_data[:, 0], outnames, args_data[:, 1], args_data[:, 2], args_data[:, 3], args_data[:, 4], info_dicts_filtered, titles, [fmt] * len(outnames), nb_threads, ) if combined: print("Make combined cutouts") p = mp.Pool(nb_threads) p.starmap(combine_cutouts, args_combined) p.close()
def subimage(path, training, size=32, radius=1, flag_notsub=False, false=False): """ Extract a sub-image centered on the candidate position """ path_gmadet = getpath() # size of the extracted image cutsize = (size, size) print("Combine the detections from all simulated images.") candidates_list = getCandPos(path, flag_notsub=flag_notsub) if training: print("Crossmatch simulated events with detections") sim_list = crossmatch_detections(path, candidates_list, radius=radius) resdir = os.path.join(path, "candidates_training") mkdir_p(resdir) truedir = os.path.join(path, "candidates_training", "true") mkdir_p(truedir) falsedir = os.path.join(path, "candidates_training", "false") mkdir_p(falsedir) else: resdir = os.path.join(path, "candidates") mkdir_p(resdir) for i, cand in enumerate(candidates_list): print( "processing candidates %d/%d ..." % (i, len(candidates_list)), end="\r", flush=True, ) OT_coords = [cand["RA"], cand["Dec"]] if training: # Check if corresponds to a simulated event mask = sim_list["closest_candID"] == cand["ID"] # Check whether it is a simulated object # mask1 = sim_list['filename2'] == cand['OriginalIma'] # mask2 = (sim_list['RA'] - cand['RA'])**2 + \ # (sim_list['Dec'] - cand['Dec'])**2 < (radius/3600)**2 # mask = np.bitwise_and(mask1,mask2) # Consider only sources with a single match. # Some real sources close to a cosmic or another will not be # consider. But with a visual inspection they can be easily # classified as true transient. if len(sim_list[mask]) == 1: outdir = truedir else: if false: outdir = falsedir else: outdir = resdir inputname = cand["filename"] else: outdir = resdir inputname = cand["filename"] outname = os.path.join(outdir, "candidate_%d.fits" % i) # If the inputname can not be found for some reasons # Make sure the code is not crashing try: # Get initial image size hdr_input = fits.getheader(inputname) Naxis1 = hdr_input["NAXIS1"] Naxis2 = hdr_input["NAXIS2"] # Extract small image # Only one threads as we provide one image after another make_sub_image( inputname, outname, OT_coords, coords_type="world", sizes=[size, size], FoVs=-1, fmts="fits", nb_threads=1, ) # add information to header hdus = fits.open(outname, memmap=False) hdr = hdus[0].header hdr["MAG"] = cand["mag"] hdr["MAGERR"] = cand["magerr"] hdr["FWHM"] = cand["FWHM"] hdr["FWHMPSF"] = cand["FWHMPSF"] hdr["FILTER"] = cand["Band"] hdr["RA"] = cand["RA"] hdr["Dec"] = cand["Dec"] hdr["Xpos"] = cand["Xpos"] hdr["Ypos"] = cand["Ypos"] hdr["FILE"] = cand["filename"] hdr["CANDID"] = cand["ID"] # Whether it is close to the edge of the image # If yes the image will not be size x size in pixels """ if ( (cand["Xpos"] > Naxis1 - size) or (cand["Xpos"] < size) or (cand["Ypos"] > Naxis2 - size) or (cand["Ypos"] < size) ): hdr["edge"] = "True" print("Edge ", outname) else: hdr["edge"] = "False" """ # If source too close to the edge, the cutout has dimension # smaller than the reauired (size x size) # It will cause the CNN to crash so flag them. if hdus[0].data.shape != (size, size): hdr["edge"] = "True" else: hdr["edge"] = "False" hdus.writeto(outname, overwrite=True) except BaseException: print("Could not extract candidate in %s" % inputname)
def sim(datapath, filenames, Ntrans=50, size=48, magrange=[14, 22], gain=None, magzp=30): """Insert point sources in real images """ filenames = np.atleast_1d(filenames) #simdir = os.path.join(datapath, "simulation") simdir = datapath mkdir_p(simdir) cutsize = np.array([size, size], dtype=np.int32) hcutsize = cutsize // 2 # List to store position of simulated transients trans_pix = [] trans_wcs = [] filelist = [] maglist = [] filterlist = [] cpsf1 = np.zeros((cutsize[1], cutsize[0])) counter = 0 # To limit the number of images in which # to simulate stars, psf are also included in filenames # filenames = filenames[:4] for filename in filenames: if "psf" not in filename and "weight" not in filename: name = os.path.basename(filename) # print("\x1b[2K", end='\r', flush=True), # print("Loading " + epoch1 + " image data ...", end='\r', # flush=True), hdusi1 = fits.open(filename, memmap=False) headi1 = hdusi1[0].header band = str(headi1["FILTER"]) ima1 = hdusi1[0].data.astype(np.float32) hdusp1 = fits.open( os.path.splitext(filename)[0] + "_psf.fits", memmap=False) headp1 = hdusp1[0].header step1 = headp1["PSF_SAMP"] nb_psf_snaps = int(headp1["PSF_NB"]) psfs1 = hdusp1[0].data.astype(np.float32) imsize = ima1.shape posfac = np.array([nb_psf_snaps, nb_psf_snaps]) / imsize w = wcs.WCS(headi1) # try to use GAIN from header if gain is None: try: gain = headp1["GAIN"] except BaseException: print("GAIN keyword not found in header, set to 1.0.") gain = 1.0 # Add the transients to image pos = np.zeros((Ntrans, 2), dtype=float) for j in range(Ntrans): # newfile = os.path.join(simdir, os.path.splitext(name)[ # 0] + "_" + str(counter) + ".fits") # Keep same name actually, if works then remove line above. newfile = filename filelist.append(os.path.abspath(newfile)) filterlist.append(band) pos[j] = np.random.random_sample( 2) * (imsize - cutsize) + cutsize / 2.0 # store positions ra, dec = w.wcs_pix2world(pos[j][1], pos[j][0], 1) trans_pix.append(pos[j]) trans_wcs.append([ra, dec]) # get pixels indexes in the image ipos = pos[j].astype(int) # extract subimage centered on position of the object and # store in cima1 # same for weight maps iposrange = np.s_[ ipos[0] - hcutsize[0]: ipos[0] + hcutsize[0], ipos[1] - hcutsize[1]: ipos[1] + hcutsize[1], ] # Select the PSF corresponding to the image area, # in case there are more than one PSF estimated per axis if nb_psf_snaps == 1: psf1 = psfs1 else: # get position with respect to number of PSF snapshots ppos = (pos[j] * posfac).astype(int) psf1 = psfs1[ppos[0], ppos[1]] # step1 is psf_samp parameter from psfex, used in mat1 and 2 # to rescale psf to image. # psf1 is from psfex, and has different sampling than image1. # New object put in the center of this matrix. mat1 = np.array( [ [step1, 0., hcutsize[0] - psf1.shape[0] * step1 / 2.], [0., step1, hcutsize[1] - psf1.shape[0] * step1 / 2.], ] ) # transformation of the PSF, resampling. cpsf1 = cv2.warpAffine( psf1, mat1, cpsf1.shape, flags=cv2.INTER_LANCZOS4 ) # define the object magnitude using this random number and # predefined ranges mag = np.random.uniform( low=magrange[0], high=magrange[1], size=(1,)) # convert the magnitude in ADU using the zeropoint magnitude. # Note that the zeropoint magnitude is define as 30, # so did not care of the exact value. # simply needed to draw random magnitudes. We could estimate # the proper one for our telescopes maglist.append(mag[0]) amp1 = np.exp(0.921034 * (magzp - mag)) # Apply Poisson Noise to simulated object noisy_object1 = cpsf1 * amp1 # np.random.poisson(cpsf1 * amp1) noisy_object1[noisy_object1 < 0] = 0 noisy_object1 = np.random.poisson(noisy_object1) noisy_object1 = noisy_object1 / gain ima1[iposrange] += noisy_object1 hdusi1[0].data = ima1 # Write new fits file hdusi1.writeto(newfile, overwrite=True) hdusi1.close() hdusp1.close() counter += 1 xypos = np.array(trans_pix) wcspos = np.array(trans_wcs) idx = np.arange(len(xypos)) table = Table( [ idx, filelist, xypos[:, 1], xypos[:, 0], wcspos[:, 0], wcspos[:, 1], maglist, filterlist, ], names=[ "idx", "filename", "Xpos", "Ypos", "RA", "Dec", "mag", "filter"], ) table.write( os.path.join(simdir, "simulated_objects.list"), format="ascii.commented_header", overwrite=True, ) return table
def substraction(filenames, reference, config, soft="hotpants", method="individual", doMosaic=False, verbose="NORMAL", outLevel=1, nb_threads=8): """Substract a reference image to the input image""" imagelist = np.atleast_1d(filenames) for ima in imagelist: # Create folder with substraction results path, filename = os.path.split(ima) if path: folder = path + "/" else: folder = "" resultDir = folder + "substraction/" mkdir_p(resultDir) # Get coordinates of input image im_coords = get_corner_coords(ima) # Define the reference image if reference == "ps1": _, band, _ = get_phot_cat(ima, None) if band == "B": band = "g" elif band == "V": band = "g" elif band == "R": band = "r" elif band == "I": band = "i" elif band == "g+r": band = "r" # band = 'g' ps1_cell_table = ps1_grid(im_coords) # Get PS1 files with whom to perform substraction subfiles = prepare_PS1_sub( ps1_cell_table, band, ima, config, verbose=verbose, method=method ) regis_info = registration( subfiles, config, resultDir=resultDir, reference=reference, verbose=verbose ) if soft == "hotpants": subFiles = hotpants(regis_info, config, verbose=verbose, nb_threads=nb_threads) # create a mosaic of al substracted images when # ps1_method=='individual' # Mosaic for substracted files if method == "individual" and doMosaic: subfiles = np.array(subFiles) # Mosaic for input file sublist = [i for i in subfiles[:, 0]] outName = os.path.splitext(filename)[0] + "_mosaic" create_mosaic( sublist, ima, resultDir, outName, config=config, verbose=verbose ) # Mosaic for ps1 reference files sublist = [i for i in subfiles[:, 1]] outName = os.path.splitext(filename)[0] + "_mosaic_ps1" create_mosaic( sublist, ima, resultDir, outName, config=config, verbose=verbose ) # Mosaic for substracted files sublist = [i for i in subfiles[:, 2]] outName = os.path.splitext(filename)[0] + "_mosaic_sub" create_mosaic( sublist, ima, resultDir, outName, config=config, verbose=verbose ) # Mosaic for mask applied to substracted files # Actually there is no need sublist = [i for i in subfiles[:, 3]] outName = os.path.splitext(filename)[0] + "_mosaic_sub_mask" create_mosaic( sublist, ima, resultDir, outName, config=config, verbose=verbose ) # Delete files if necessary, mainly to save space disk # Problem when deleting files, they will appear in output files but # user can not have a look at some that might be important if outLevel == 0: # rm_p(ima) rm_p(refim) rm_p(refim_mask) rm_p(ima_regist) rm_p(refim_regist) rm_p(refim_regist_mask) return subFiles
def train(path_cube, path_model, modelname, epochs, frac=0.1, dropout=0.3): """Train CNN with simulated data""" gpus = -1 path_model = os.path.join(path_model, 'CNN_training/') mkdir_p(path_model) # Fraction of data used for the validation test fract = frac # define dropout percentageof each dropout dprob = np.array([dropout, dropout, dropout]) # define padding padding = "same" # valid, same # number of epochs epochs = epochs # outputname for the trained model model_name = os.path.join(path_model, "%s.h5" % modelname) print("Loading " + path_cube + " ...", end="\r", flush=True) data = np.load(path_cube) ima = data["cube"] lab = keras.utils.to_categorical(data["labels"]) mag = data["mags"] errmag = data["errmags"] band = data["filters"] cand_ids = data["candids"] nclass = lab.shape[1] n = ima.shape[0] nt = int(n * fract) print("Shuffling data ...", end="\r", flush=True) randomize = np.arange(n) np.random.shuffle(randomize) ima = ima[randomize] lab = lab[randomize] mag = mag[randomize] errmag = errmag[randomize] band = band[randomize] cand_ids = cand_ids[randomize] print("Splitting dataset ...", end="\r", flush=True) imal = ima[nt:] labl = lab[nt:] magl = mag[nt:] errmagl = errmag[nt:] bandl = band[nt:] cand_idsl = cand_ids[nt:] imat = ima[:nt] labt = lab[:nt] magt = mag[:nt] errmagt = errmag[:nt] bandt = band[:nt] cand_idst = cand_ids[:nt] model = keras.models.Sequential() model.add( keras.layers.Conv2D(64, (3, 3), activation="elu", padding=padding, input_shape=ima.shape[1:]) ) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.AveragePooling2D(pool_size=(2, 2))) # model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) model.add(keras.layers.Dropout(dprob[0])) model.add( keras.layers.Conv2D( 128, (3, 3), activation="elu", padding=padding)) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) model.add(keras.layers.Dropout(dprob[1])) model.add( keras.layers.Conv2D( 256, (3, 3), activation="elu", padding=padding)) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) model.add(keras.layers.Dropout(dprob[1])) model.add( keras.layers.Conv2D( 256, (3, 3), activation="elu", padding=padding)) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) model.add(keras.layers.Dropout(dprob[2])) model.add(keras.layers.Flatten()) model.add(keras.layers.Dense(512, activation="elu")) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dropout(dprob[2])) model.add(keras.layers.Dense(32, activation="elu")) model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dropout(dprob[2])) model.add(keras.layers.Dense(nclass, activation="softmax")) model.summary() if gpus > 0: parallel_model = multi_gpu_model(model, gpus=gpus) parallel_model.compile( loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.001), # optimizer=keras.optimizers.Nadam(), metrics=["accuracy"], ) parallel_model.fit( imal, labl, batch_size=1024, epochs=epochs, verbose=1, validation_data=(imat, labt), ) score = parallel_model.evaluate(imat, labt, verbose=0) # save does not work on multi_gpu_model # parallel_model.save(model_name) labp = parallel_model.predict(imat) else: model.compile( loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.001), # optimizer=keras.optimizers.Nadam(), metrics=["accuracy"], ) # log = keras.callbacks.ModelCheckpoint( # 'callbacks.h5', monitor='val_loss', verbose=0, # save_best_only=True, save_weights_only=False, # mode='auto', period=1) # log = keras.callbacks(TensorBoard( # log_dir='./logs', histogram_freq=5, batch_size=1024, # write_graph=True, write_grads=False, write_images=False, # embeddings_freq=0, embeddings_layer_names=None, # embeddings_metadata=None, embeddings_data=None, # update_freq='epoch')) model.fit( imal, labl, batch_size=1024, epochs=epochs, verbose=1, validation_data=(imat, labt), ) score = model.evaluate(imat, labt, verbose=0) labp = model.predict(imat) model.save(model_name) trange = np.arange(0.5, 1.0, 0.0001) fig, ax = plt.subplots() ax.set_xlabel("FPR") ax.set_ylabel("TPR") mag_min = np.min(magt[magt != 99]) mag_max = np.max(magt[magt != 99]) for maglim in np.linspace(mag_min, mag_max, 6): labpm = labp[magt < maglim] labtm = labt[magt < maglim] labpf = labpm[labtm[:, 1] <= 0.5] labpt = labpm[labtm[:, 1] > 0.5] tpr = [np.mean(labpt[:, 1] > t) for t in trange] fpr = [np.mean(labpf[:, 1] > t) for t in trange] plt.plot(fpr, tpr, label="mag < %.2f" % maglim) legend = ax.legend(loc="lower right") plt.savefig(os.path.join(path_model, modelname + "_ROC_mag.png")) # plt.show() # ROC with dmag fig, ax = plt.subplots() ax.set_xlabel("FPR") ax.set_ylabel("TPR") errmag_min = np.min(abs(errmagt[errmagt != 0])) errmag_max = np.max(abs(errmagt[errmagt != 0])) for errmaglim in np.linspace(errmag_min, errmag_max, 6): labpm = labp[errmagt < errmaglim] labtm = labt[errmagt < errmaglim] labpf = labpm[labtm[:, 1] <= 0.5] labpt = labpm[labtm[:, 1] > 0.5] tpr = [np.mean(labpt[:, 1] > t) for t in trange] fpr = [np.mean(labpf[:, 1] > t) for t in trange] plt.plot(fpr, tpr, label="errmag < %.2f" % errmaglim) legend = ax.legend(loc="lower right") plt.savefig(os.path.join(path_model, modelname + "_ROC_errmag.png")) # plt.show() fig, ax = plt.subplots() ax.set_xlabel("FPR") ax.set_ylabel("TPR") for band in ["g", "r", "i", "z"]: labpm = labp[bandt == band] labtm = labt[bandt == band] labpf = labpm[labtm[:, 1] <= 0.5] labpt = labpm[labtm[:, 1] > 0.5] tpr = [np.mean(labpt[:, 1] > t) for t in trange] fpr = [np.mean(labpf[:, 1] > t) for t in trange] plt.plot(fpr, tpr, label="%s" % band) legend = ax.legend(loc="lower right") plt.savefig(os.path.join(path_model, modelname + "_ROC_band.png"))